From 7bf764e00734830f23c5b185999ce525f7377bc5 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 5 Aug 2022 16:54:40 -0500 Subject: [PATCH 01/17] Laid out the basic structure for PytatoCUDAGraphContext --- arraycontext/__init__.py | 5 +- arraycontext/impl/pytato/__init__.py | 226 +++++++++++++++++++++++++++ arraycontext/impl/pytato/compile.py | 107 +++++++++++++ arraycontext/pytest.py | 22 +++ test/test_arraycontext.py | 14 +- 5 files changed, 366 insertions(+), 8 deletions(-) diff --git a/arraycontext/__init__.py b/arraycontext/__init__.py index 06e0b96..e1f5383 100644 --- a/arraycontext/__init__.py +++ b/arraycontext/__init__.py @@ -74,7 +74,8 @@ from .container.traversal import ( from .impl.pyopencl import PyOpenCLArrayContext from .impl.pytato import (PytatoPyOpenCLArrayContext, - PytatoJAXArrayContext) + PytatoJAXArrayContext, + PytatoCUDAGraphArrayContext) from .impl.jax import EagerJAXArrayContext from .pytest import ( @@ -120,7 +121,7 @@ __all__ = ( "outer", "PyOpenCLArrayContext", "PytatoPyOpenCLArrayContext", - "PytatoJAXArrayContext", + "PytatoJAXArrayContext", "PytatoCUDAGraphArrayContext", "EagerJAXArrayContext", "make_loopy_program", diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 8d7e042..2ef81fc 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -11,6 +11,7 @@ Following :mod:`pytato`-based array context are provided: .. autoclass:: PytatoPyOpenCLArrayContext .. autoclass:: PytatoJAXArrayContext +.. autoclass:: PytatoCUDAGraphContext Compiling a Python callable (Internal) @@ -789,4 +790,229 @@ class PytatoJAXArrayContext(_BasePytatoArrayContext): # }}} +# {{{ PytatoJAXArrayContext + +class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): + """ + An arraycontext that uses :mod:`pytato` to represent the thawed state of + the arrays and compiles the expressions using + :class:`pytato.target.pycuda.CUDAGraphTarget`. + """ + + def __init__(self, + *, compile_trace_callback: Optional[Callable[[Any, str, Any], None]] + = None) -> None: + """ + :arg compile_trace_callback: A function of three arguments + *(what, stage, ir)*, where *what* identifies the object + being compiled, *stage* is a string describing the compilation + pass, and *ir* is an object containing the intermediate + representation. This interface should be considered + unstable. + """ + import pytato as pt + from pycuda.gpuarray import GPUArray + super().__init__(compile_trace_callback=compile_trace_callback) + self.array_types = (pt.Array, GPUArray) + + @property + def _frozen_array_types(self) -> Tuple[Type, ...]: + from pycuda.gpuarray import GPUArray + return (GPUArray, ) + + def _rec_map_container( + self, func: Callable[[Array], Array], array: ArrayOrContainer, + allowed_types: Optional[Tuple[type, ...]] = None, *, + default_scalar: Optional[ScalarLike] = None, + strict: bool = False) -> ArrayOrContainer: + if allowed_types is None: + allowed_types = self.array_types + + def _wrapper(ary): + if isinstance(ary, allowed_types): + return func(ary) + elif np.isscalar(ary): + if default_scalar is None: + return ary + else: + return np.array(ary).dtype.type(default_scalar) + else: + raise TypeError( + f"{type(self).__name__}.{func.__name__[1:]} invoked with " + f"an unsupported array type: got '{type(ary).__name__}', " + f"but expected one of {allowed_types}") + + return rec_map_array_container(_wrapper, array) + + # {{{ ArrayContext interface + + def zeros_like(self, ary): + def _zeros_like(array): + return self.zeros(array.shape, array.dtype) + + return self._rec_map_container(_zeros_like, ary, default_scalar=0) + + def from_numpy(self, array): + import pycuda.gpuarray as gpuarray + import pytato as pt + + def _from_numpy(ary): + return pt.make_data_wrapper(gpuarray.to_gpu(ary)) + + return with_array_context( + self._rec_map_container(_from_numpy, array, (np.ndarray,)), + actx=self) + + def to_numpy(self, array): + def _to_numpy(ary): + return ary.get() + + return with_array_context( + self._rec_map_container(_to_numpy, self.freeze(array)), + actx=None) + + def freeze(self, array): + if np.isscalar(array): + return array + + import pytato as pt + + from pycuda.gpuarray import GPUArray + from arraycontext.container.traversal import rec_keyed_map_array_container + from arraycontext.impl.pytato.compile import _ary_container_key_stringifier + + array_as_dict: Dict[str, Union[GPUArray, pt.Array]] = {} + key_to_frozen_subary: Dict[str, GPUArray] = {} + key_to_pt_arrays: Dict[str, pt.Array] = {} + + def _record_leaf_ary_in_dict(key: Tuple[Any, ...], + ary: Union[GPUArray, pt.Array]) -> None: + key_str = "_ary" + _ary_container_key_stringifier(key) + array_as_dict[key_str] = ary + + rec_keyed_map_array_container(_record_leaf_ary_in_dict, array) + + # {{{ remove any non pytato arrays from array_as_dict + + for key, subary in array_as_dict.items(): + if isinstance(subary, GPUArray): + key_to_frozen_subary[key] = subary.block_until_ready() + elif isinstance(subary, pt.DataWrapper): + # trivial freeze. + key_to_frozen_subary[key] = subary.data.block_until_ready() + elif isinstance(subary, pt.Array): + key_to_pt_arrays[key] = subary + else: + raise TypeError( + f"{type(self).__name__}.freeze invoked with an unsupported " + f"array type: got '{type(subary).__name__}', but expected one " + f"of {self.array_types}") + + # }}} + + pt_dict_of_named_arrays = pt.make_dict_of_named_arrays(key_to_pt_arrays) + transformed_dag = self.transform_dag(pt_dict_of_named_arrays) + pt_prg = pt.generate_cudagraph(transformed_dag) + out_dict = pt_prg() + assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 + + key_to_frozen_subary = { + **key_to_frozen_subary, + **{k: v.block_until_ready() + for k, v in out_dict.items()} + } + + def _to_frozen(key: Tuple[Any, ...], ary) -> GPUArray: + key_str = "_ary" + _ary_container_key_stringifier(key) + return key_to_frozen_subary[key_str] + + return with_array_context( + rec_keyed_map_array_container(_to_frozen, array), + actx=None) + + def thaw(self, array): + import pytato as pt + + def _thaw(ary): + return pt.make_data_wrapper(ary) + + return with_array_context( + self._rec_map_container(_thaw, array, self._frozen_array_types), + actx=self) + + def compile(self, f: Callable[..., Any]) -> Callable[..., Any]: + from .compile import LazilyCUDAGraphCompilingFunctionCaller + return LazilyCUDAGraphCompilingFunctionCaller(self, f) + + def tag(self, tags: ToTagSetConvertible, array): + from pycuda.gpuarray import GPUArray + + def _tag(ary): + if isinstance(ary, GPUArray): + return ary + else: + return ary.tagged(_preprocess_array_tags(tags)) + + return self._rec_map_container(_tag, array) + + def tag_axis(self, iaxis, tags: ToTagSetConvertible, array): + from pycuda.gpuarray import GPUArray + + def _tag_axis(ary): + if isinstance(ary, GPUArray): + return ary + else: + return ary.with_tagged_axis(iaxis, tags) + + return self._rec_map_container(_tag_axis, array) + + # }}} + + # {{{ compilation + + def call_loopy(self, program, **kwargs): + raise NotImplementedError( + "Calling loopy on GPUArray arrays is not supported. Maybe rewrite" + " the loopy kernel as numpy-flavored array operations using" + " ArrayContext.np.") + + def einsum(self, spec, *args, arg_names=None, tagged=()): + import pytato as pt + from pycuda.gpuarray import GPUArray + if arg_names is None: + arg_names = (None,) * len(args) + + def preprocess_arg(name, arg): + if isinstance(arg, GPUArray): + ary = self.thaw(arg) + elif isinstance(arg, pt.Array): + ary = arg + else: + raise TypeError( + f"{type(self).__name__}.einsum invoked with an unsupported " + f"array type: got '{type(arg).__name__}', but expected one " + f"of {self.array_types}") + + if name is not None: + # Tagging Placeholders with naming-related tags is pointless: + # They already have names. It's also counterproductive, as + # multiple placeholders with the same name that are not + # also the same object are not allowed, and this would produce + # a different Placeholder object of the same name. + if (not isinstance(ary, pt.Placeholder) + and not ary.tags_of_type(NameHint)): + ary = ary.tagged(NameHint(name)) + + return ary + + return pt.einsum(spec, *[ + preprocess_arg(name, arg) + for name, arg in zip(arg_names, args) + ]).tagged(_preprocess_array_tags(tagged)) + + def clone(self): + return type(self)() + +# }}} + # vim: foldmethod=marker diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 3282328..1d9d42c 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -2,6 +2,7 @@ .. autoclass:: BaseLazilyCompilingFunctionCaller .. autoclass:: LazilyPyOpenCLCompilingFunctionCaller .. autoclass:: LazilyJAXCompilingFunctionCaller +.. autoclass:: LazilyCUDAGraphCompilingFunctionCaller .. autoclass:: CompiledFunction .. autoclass:: FromArrayContextCompile """ @@ -33,6 +34,7 @@ from arraycontext.context import ArrayT from arraycontext.container import ArrayContainer, is_array_container_type from arraycontext.impl.pytato import (_BasePytatoArrayContext, PytatoJAXArrayContext, + PytatoCUDAGraphArrayContext, PytatoPyOpenCLArrayContext) from arraycontext.container.traversal import rec_keyed_map_array_container @@ -506,6 +508,49 @@ class LazilyJAXCompilingFunctionCaller(BaseLazilyCompilingFunctionCaller): return pytato_program, name_in_program_to_tags, name_in_program_to_axes +class LazilyCUDAGraphCompilingFunctionCaller(BaseLazilyCompilingFunctionCaller): + @property + def compiled_function_returning_array_container_class( + self) -> Type["CompiledFunction"]: + return CompiledCUDAGraphFunctionReturningArrayContainer + + @property + def compiled_function_returning_array_class(self) -> Type["CompiledFunction"]: + return CompiledCUDAGraphFunctionReturningArray + + def _dag_to_transformed_pytato_prg(self, dict_of_named_arrays, *, prg_id=None): + if prg_id is None: + prg_id = self.f + + self.actx._compile_trace_callback( + prg_id, "pre_transform_dag", dict_of_named_arrays) + + with ProcessLogger(logger, "transform_dag for '{prg_id}'"): + pt_dict_of_named_arrays = self.actx.transform_dag(dict_of_named_arrays) + + self.actx._compile_trace_callback( + prg_id, "post_transform_dag", pt_dict_of_named_arrays) + + name_in_program_to_tags = { + name: out.tags + for name, out in pt_dict_of_named_arrays._data.items()} + name_in_program_to_axes = { + name: out.axes + for name, out in pt_dict_of_named_arrays._data.items()} + + self.actx._compile_trace_callback( + prg_id, "pre_generate_cudagraph", pt_dict_of_named_arrays) + + with ProcessLogger(logger, f"generate_cudagraph for '{prg_id}'"): + pytato_program = pt.generate_cudagraph( + pt_dict_of_named_arrays, + function_name=_prg_id_to_kernel_name(prg_id)) + + self.actx._compile_trace_callback( + prg_id, "post_generate_cudagraph", pytato_program) + + return pytato_program, name_in_program_to_tags, name_in_program_to_axes + def _args_to_device_buffers(actx, input_id_to_name_in_program, arg_id_to_arg): input_kwargs_for_loopy = {} @@ -732,3 +777,65 @@ class CompiledJAXFunctionReturningArray(CompiledFunction): evt, out_dict = self.pytato_program(**input_kwargs_for_loopy) return self.actx.thaw(out_dict[self.output_name]) + +@dataclass(frozen=True) +class CompiledCUDAGraphFunctionReturningArrayContainer(CompiledFunction): + """ + .. attribute:: output_id_to_name_in_program + + A mapping from output id to the name of + :class:`pytato.array.NamedArray` in + :attr:`CompiledFunction.pytato_program`. Output id is represented by + the key of a leaf array in the array container + :attr:`CompiledFunction.output_template`. + + .. attribute:: output_template + + An instance of :class:`arraycontext.ArrayContainer` that is the return + type of the callable. + """ + actx: PytatoCUDAGraphArrayContext + pytato_program: pt.target.BoundProgram + input_id_to_name_in_program: Mapping[Tuple[Any, ...], str] + output_id_to_name_in_program: Mapping[Tuple[Any, ...], str] + name_in_program_to_tags: Mapping[str, FrozenSet[Tag]] + name_in_program_to_axes: Mapping[str, Tuple[pt.Axis, ...]] + output_template: ArrayContainer + + def __call__(self, arg_id_to_arg) -> ArrayContainer: + input_kwargs_for_loopy = _args_to_device_buffers( + self.actx, self.input_id_to_name_in_program, arg_id_to_arg) + + out_dict = self.pytato_program(**input_kwargs_for_loopy) + + def to_output_template(keys, _): + return self.actx.thaw( + out_dict[self.output_id_to_name_in_program[keys]] + .block_until_ready() + ) + + return rec_keyed_map_array_container(to_output_template, + self.output_template) + + +@dataclass(frozen=True) +class CompiledJAXFunctionReturningArray(CompiledFunction): + """ + .. attribute:: output_name_in_program + + Name of the output array in the program. + """ + actx: PytatoCUDAGraphArrayContext + pytato_program: pt.target.BoundProgram + input_id_to_name_in_program: Mapping[Tuple[Any, ...], str] + output_tags: FrozenSet[Tag] + output_axes: Tuple[pt.Axis, ...] + output_name: str + + def __call__(self, arg_id_to_arg) -> ArrayContainer: + input_kwargs_for_loopy = _args_to_device_buffers( + self.actx, self.input_id_to_name_in_program, arg_id_to_arg) + + evt, out_dict = self.pytato_program(**input_kwargs_for_loopy) + + return self.actx.thaw(out_dict[self.output_name]) \ No newline at end of file diff --git a/arraycontext/pytest.py b/arraycontext/pytest.py index 1eceb49..2aae1ba 100644 --- a/arraycontext/pytest.py +++ b/arraycontext/pytest.py @@ -194,6 +194,27 @@ class _PytestPytatoJaxArrayContextFactory(PytestArrayContextFactory): def __str__(self): return "" +class _PytestPytatoCUDAGraphArrayContextFactory(PytestArrayContextFactory): + def __init__(self, *args, **kwargs): + pass + + @classmethod + def is_available(cls) -> bool: + try: + import pycuda # noqa: F401 + import pytato # noqa: F401 + return True + except ImportError: + return False + + def __call__(self): + from arraycontext import PytatoCUDAGraphArrayContext + import pycuda.autoinit + return PytatoCUDAGraphArrayContext() + + def __str__(self): + return "" + _ARRAY_CONTEXT_FACTORY_REGISTRY: \ Dict[str, Type[PytestArrayContextFactory]] = { @@ -202,6 +223,7 @@ _ARRAY_CONTEXT_FACTORY_REGISTRY: \ _PytestPyOpenCLArrayContextFactoryWithClassAndHostScalars, "pytato:pyopencl": _PytestPytatoPyOpenCLArrayContextFactory, "pytato:jax": _PytestPytatoJaxArrayContextFactory, + "pytato:cudagraph": _PytestPytatoCUDAGraphArrayContextFactory, "eagerjax": _PytestEagerJaxArrayContextFactory, } diff --git a/test/test_arraycontext.py b/test/test_arraycontext.py index 842d108..ae6b038 100644 --- a/test/test_arraycontext.py +++ b/test/test_arraycontext.py @@ -44,7 +44,8 @@ from arraycontext import ( # noqa: F401 from arraycontext.pytest import (_PytestPyOpenCLArrayContextFactoryWithClass, _PytestPytatoPyOpenCLArrayContextFactory, _PytestEagerJaxArrayContextFactory, - _PytestPytatoJaxArrayContextFactory) + _PytestPytatoJaxArrayContextFactory, + _PytestPytatoCUDAGraphArrayContextFactory) import logging @@ -88,11 +89,12 @@ class _PytatoPyOpenCLArrayContextForTestsFactory( pytest_generate_tests = pytest_generate_tests_for_array_contexts([ - _PyOpenCLArrayContextForTestsFactory, - _PyOpenCLArrayContextWithHostScalarsForTestsFactory, - _PytatoPyOpenCLArrayContextForTestsFactory, - _PytestEagerJaxArrayContextFactory, - _PytestPytatoJaxArrayContextFactory, + # _PyOpenCLArrayContextForTestsFactory, + # _PyOpenCLArrayContextWithHostScalarsForTestsFactory, + # _PytatoPyOpenCLArrayContextForTestsFactory, + # _PytestEagerJaxArrayContextFactory, + # _PytestPytatoJaxArrayContextFactory, + _PytestPytatoCUDAGraphArrayContextFactory ]) -- GitLab From e47d0cf91c26c05432a1d672bc99fa37b3c49254 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 5 Aug 2022 18:06:37 -0500 Subject: [PATCH 02/17] Fix fold markers in arraycontext.impl.pytato --- arraycontext/impl/pytato/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 8d7e042..b7ddc39 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -789,4 +789,6 @@ class PytatoJAXArrayContext(_BasePytatoArrayContext): # }}} +# }}} + # vim: foldmethod=marker -- GitLab From e332d72fbe5cb0d697ab90c3420a435bbe5afd38 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 6 Aug 2022 14:27:46 -0500 Subject: [PATCH 03/17] modified freeze functionality --- .gitlab-ci.yml | 1 + arraycontext/impl/pytato/__init__.py | 10 ++++++---- arraycontext/impl/pytato/compile.py | 10 +++++++--- arraycontext/pytest.py | 3 ++- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3f7e760..9dd805a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,6 +21,7 @@ Python 3 Nvidia Titan V: export PYOPENCL_TEST=nvi:titan build_py_project_in_venv pip install --upgrade "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_releases.html + pip install git+https://gitlab.tiker.net/kaushikcfd/pycuda.git@cudagraph#egg=pycuda test_py_project tags: diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 2ef81fc..d589a86 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -792,6 +792,7 @@ class PytatoJAXArrayContext(_BasePytatoArrayContext): # {{{ PytatoJAXArrayContext + class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): """ An arraycontext that uses :mod:`pytato` to represent the thawed state of @@ -860,7 +861,7 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): return pt.make_data_wrapper(gpuarray.to_gpu(ary)) return with_array_context( - self._rec_map_container(_from_numpy, array, (np.ndarray,)), + self._rec_map_container(_from_numpy, array, (np.ndarray,), strict=True), actx=self) def to_numpy(self, array): @@ -896,10 +897,11 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): for key, subary in array_as_dict.items(): if isinstance(subary, GPUArray): - key_to_frozen_subary[key] = subary.block_until_ready() + key_to_frozen_subary[key] = subary elif isinstance(subary, pt.DataWrapper): # trivial freeze. - key_to_frozen_subary[key] = subary.data.block_until_ready() + import pycuda.gpuarray as gpuarray + key_to_frozen_subary[key] = gpuarray.to_gpu(subary.data) elif isinstance(subary, pt.Array): key_to_pt_arrays[key] = subary else: @@ -918,7 +920,7 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): key_to_frozen_subary = { **key_to_frozen_subary, - **{k: v.block_until_ready() + **{k: v for k, v in out_dict.items()} } diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 1d9d42c..f674104 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -551,6 +551,7 @@ class LazilyCUDAGraphCompilingFunctionCaller(BaseLazilyCompilingFunctionCaller): return pytato_program, name_in_program_to_tags, name_in_program_to_axes + def _args_to_device_buffers(actx, input_id_to_name_in_program, arg_id_to_arg): input_kwargs_for_loopy = {} @@ -562,6 +563,9 @@ def _args_to_device_buffers(actx, input_id_to_name_in_program, arg_id_to_arg): elif isinstance(actx, PytatoJAXArrayContext): import jax arg = jax.device_put(arg) + elif isinstance(actx, PytatoCUDAGraphArrayContext): + import pycuda.gpuarray as gpuarray + arg = gpuarray.to_gpu(np.array(arg)) else: raise NotImplementedError(type(actx)) @@ -778,6 +782,7 @@ class CompiledJAXFunctionReturningArray(CompiledFunction): return self.actx.thaw(out_dict[self.output_name]) + @dataclass(frozen=True) class CompiledCUDAGraphFunctionReturningArrayContainer(CompiledFunction): """ @@ -811,7 +816,6 @@ class CompiledCUDAGraphFunctionReturningArrayContainer(CompiledFunction): def to_output_template(keys, _): return self.actx.thaw( out_dict[self.output_id_to_name_in_program[keys]] - .block_until_ready() ) return rec_keyed_map_array_container(to_output_template, @@ -819,7 +823,7 @@ class CompiledCUDAGraphFunctionReturningArrayContainer(CompiledFunction): @dataclass(frozen=True) -class CompiledJAXFunctionReturningArray(CompiledFunction): +class CompiledCUDAGraphFunctionReturningArray(CompiledFunction): """ .. attribute:: output_name_in_program @@ -838,4 +842,4 @@ class CompiledJAXFunctionReturningArray(CompiledFunction): evt, out_dict = self.pytato_program(**input_kwargs_for_loopy) - return self.actx.thaw(out_dict[self.output_name]) \ No newline at end of file + return self.actx.thaw(out_dict[self.output_name]) diff --git a/arraycontext/pytest.py b/arraycontext/pytest.py index 2aae1ba..964dd4b 100644 --- a/arraycontext/pytest.py +++ b/arraycontext/pytest.py @@ -194,6 +194,7 @@ class _PytestPytatoJaxArrayContextFactory(PytestArrayContextFactory): def __str__(self): return "" + class _PytestPytatoCUDAGraphArrayContextFactory(PytestArrayContextFactory): def __init__(self, *args, **kwargs): pass @@ -209,7 +210,7 @@ class _PytestPytatoCUDAGraphArrayContextFactory(PytestArrayContextFactory): def __call__(self): from arraycontext import PytatoCUDAGraphArrayContext - import pycuda.autoinit + import pycuda.autoinit # noqa return PytatoCUDAGraphArrayContext() def __str__(self): -- GitLab From e1d57d6c872b419c6609bc80f50846cc39d159ef Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 6 Aug 2022 20:10:14 +0000 Subject: [PATCH 04/17] Added pytato@cudagraph --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9dd805a..b59d94e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,6 +22,7 @@ Python 3 Nvidia Titan V: build_py_project_in_venv pip install --upgrade "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_releases.html pip install git+https://gitlab.tiker.net/kaushikcfd/pycuda.git@cudagraph#egg=pycuda + pip install git+https://github.com/mitkotak/pytato.git@cudagraph#egg=pytato test_py_project tags: -- GitLab From e614abfcd83ff1db23d5a4930f5defa32001df5e Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sun, 7 Aug 2022 14:46:07 -0500 Subject: [PATCH 05/17] modified CI --- .gitlab-ci.yml | 1 - requirements.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b59d94e..9dd805a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,7 +22,6 @@ Python 3 Nvidia Titan V: build_py_project_in_venv pip install --upgrade "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_releases.html pip install git+https://gitlab.tiker.net/kaushikcfd/pycuda.git@cudagraph#egg=pycuda - pip install git+https://github.com/mitkotak/pytato.git@cudagraph#egg=pytato test_py_project tags: diff --git a/requirements.txt b/requirements.txt index a4cb402..08d26e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/loopy.git#egg=loopy -git+https://github.com/inducer/pytato.git#egg=pytato +git+https://github.com/mitkotak/pytato.git@cudagraph#egg=pytato -- GitLab From 7fba6faf836b1e643ce3c2c7c1e55f7b4deb94c5 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sun, 7 Aug 2022 14:48:25 -0500 Subject: [PATCH 06/17] changed looopy branch to pycuda_tgt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 08d26e9..d4d3ee9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy -git+https://github.com/inducer/loopy.git#egg=loopy +git+https://gitlab.tiker.net/inducer/loopy.git@pycuda_tgt#egg=loopy git+https://github.com/mitkotak/pytato.git@cudagraph#egg=pytato -- GitLab From 9ab0849638de41f9197f301fc1400691248a49e2 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 7 Aug 2022 21:59:35 -0500 Subject: [PATCH 07/17] Add folding sections in arraycontext.impl.pytato.compile --- arraycontext/impl/pytato/compile.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 3282328..27037ac 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -105,6 +105,8 @@ class LeafArrayDescriptor(AbstractInputDescriptor): # }}} +# {{{ utilities + def _ary_container_key_stringifier(keys: Tuple[Any, ...]) -> str: """ Helper for :meth:`BaseLazilyCompilingFunctionCaller.__call__`. Stringifies an @@ -236,6 +238,10 @@ def _get_f_placeholder_args(arg, kw, arg_id_to_name, actx): else: raise NotImplementedError(type(arg)) +# }}} + + +# {{{ BaseLazilyCompilingFunctionCaller @dataclass class BaseLazilyCompilingFunctionCaller: @@ -366,6 +372,10 @@ class BaseLazilyCompilingFunctionCaller: self.program_cache[arg_id_to_descr] = compiled_func return compiled_func(arg_id_to_arg) +# }}} + + +# {{{ LazilyPyOpenCLCompilingFunctionCaller class LazilyPyOpenCLCompilingFunctionCaller(BaseLazilyCompilingFunctionCaller): @property @@ -440,6 +450,8 @@ class LazilyPyOpenCLCompilingFunctionCaller(BaseLazilyCompilingFunctionCaller): return pytato_program, name_in_program_to_tags, name_in_program_to_axes +# }}} + # {{{ preserve back compat @@ -461,6 +473,8 @@ class LazilyCompilingFunctionCaller(LazilyPyOpenCLCompilingFunctionCaller): # }}} +# {{{ LazilyJAXCompilingFunctionCaller + class LazilyJAXCompilingFunctionCaller(BaseLazilyCompilingFunctionCaller): @property def compiled_function_returning_array_container_class( @@ -553,6 +567,10 @@ def _args_to_cl_buffers(actx, input_id_to_name_in_program, arg_id_to_arg): return _args_to_device_buffers(actx, input_id_to_name_in_program, arg_id_to_arg) +# }}} + + +# {{{ compiled function class CompiledFunction(abc.ABC): """ @@ -582,6 +600,10 @@ class CompiledFunction(abc.ABC): """ pass +# }}} + + +# {{{ copmiled pyopencl function @dataclass(frozen=True) class CompiledPyOpenCLFunctionReturningArrayContainer(CompiledFunction): @@ -670,7 +692,10 @@ class CompiledPyOpenCLFunctionReturningArray(CompiledFunction): self.output_axes), tags=self.output_tags)) +# }}} + +# {{{ comiled jax function @dataclass(frozen=True) class CompiledJAXFunctionReturningArrayContainer(CompiledFunction): """ @@ -732,3 +757,7 @@ class CompiledJAXFunctionReturningArray(CompiledFunction): evt, out_dict = self.pytato_program(**input_kwargs_for_loopy) return self.actx.thaw(out_dict[self.output_name]) + +# }}} + +# vim: foldmethod=marker -- GitLab From b5feb0610f9ceb18fed75cd24042cdcd0efaf47d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 7 Aug 2022 22:09:18 -0500 Subject: [PATCH 08/17] PytatoPyOpenCLArrayContext.freeze: pass allocator to loopy exec --- arraycontext/impl/pytato/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index b7ddc39..afbe7ce 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -423,7 +423,9 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): self._dag_transform_cache[normalized_expr]) assert len(pt_prg.bound_arguments) == 0 - evt, out_dict = pt_prg(self.queue, **bound_arguments) + evt, out_dict = pt_prg(self.queue, + allocator=self.allocator, + **bound_arguments) evt.wait() assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 -- GitLab From 06b26136ea89607c50d925729ab0c7080bedb5d3 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Mon, 8 Aug 2022 18:35:43 -0500 Subject: [PATCH 09/17] uncommented all the tests --- arraycontext/impl/pytato/__init__.py | 5 ++--- arraycontext/impl/pytato/compile.py | 3 ++- test/test_arraycontext.py | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index d589a86..91deb4c 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -790,7 +790,7 @@ class PytatoJAXArrayContext(_BasePytatoArrayContext): # }}} -# {{{ PytatoJAXArrayContext +# {{{ PytatoCUDAGraphArrayContext class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): @@ -828,7 +828,6 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): strict: bool = False) -> ArrayOrContainer: if allowed_types is None: allowed_types = self.array_types - def _wrapper(ary): if isinstance(ary, allowed_types): return func(ary) @@ -914,7 +913,7 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): pt_dict_of_named_arrays = pt.make_dict_of_named_arrays(key_to_pt_arrays) transformed_dag = self.transform_dag(pt_dict_of_named_arrays) - pt_prg = pt.generate_cudagraph(transformed_dag) + pt_prg = pt.generate_cudagraph(transformed_dag, show_code=True) out_dict = pt_prg() assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index f674104..4fb216f 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -842,4 +842,5 @@ class CompiledCUDAGraphFunctionReturningArray(CompiledFunction): evt, out_dict = self.pytato_program(**input_kwargs_for_loopy) - return self.actx.thaw(out_dict[self.output_name]) + import pycuda.gpuarray as gpuarray + return self.actx.thaw(gpuarray.to_gpu(out_dict[self.output_name])) diff --git a/test/test_arraycontext.py b/test/test_arraycontext.py index ae6b038..03b472c 100644 --- a/test/test_arraycontext.py +++ b/test/test_arraycontext.py @@ -89,11 +89,11 @@ class _PytatoPyOpenCLArrayContextForTestsFactory( pytest_generate_tests = pytest_generate_tests_for_array_contexts([ - # _PyOpenCLArrayContextForTestsFactory, - # _PyOpenCLArrayContextWithHostScalarsForTestsFactory, - # _PytatoPyOpenCLArrayContextForTestsFactory, - # _PytestEagerJaxArrayContextFactory, - # _PytestPytatoJaxArrayContextFactory, + _PyOpenCLArrayContextForTestsFactory, + _PyOpenCLArrayContextWithHostScalarsForTestsFactory, + _PytatoPyOpenCLArrayContextForTestsFactory, + _PytestEagerJaxArrayContextFactory, + _PytestPytatoJaxArrayContextFactory, _PytestPytatoCUDAGraphArrayContextFactory ]) -- GitLab From 5549187998188cc01099852d70e7a6438b33d23c Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Mon, 8 Aug 2022 18:47:50 -0500 Subject: [PATCH 10/17] modified requirements to include pycuda_tgt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d4d3ee9..364ad6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy git+https://gitlab.tiker.net/inducer/loopy.git@pycuda_tgt#egg=loopy -git+https://github.com/mitkotak/pytato.git@cudagraph#egg=pytato +git+https://gitlab.tiker.net/kaushikcfd/pytato.git@cudagraph#egg-pytato -- GitLab From 6871388c371dc2f1e0c86b33466eafcad2cdef2e Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Mon, 8 Aug 2022 18:56:22 -0500 Subject: [PATCH 11/17] modified requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 364ad6e..d3a3425 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy -git+https://gitlab.tiker.net/inducer/loopy.git@pycuda_tgt#egg=loopy +git+https://gitlab.tiker.net/kaushikcfd/loopy.git@pycuda_tgt#egg=loopy git+https://gitlab.tiker.net/kaushikcfd/pytato.git@cudagraph#egg-pytato -- GitLab From 9d7feaea8b84ced0710441db9c55bd84bfea67cc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 9 Aug 2022 19:20:06 -0500 Subject: [PATCH 12/17] Pass allocator when converting scalar args to dev buffers --- arraycontext/impl/pytato/compile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 27037ac..07cb57b 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -527,7 +527,8 @@ def _args_to_device_buffers(actx, input_id_to_name_in_program, arg_id_to_arg): if np.isscalar(arg): if isinstance(actx, PytatoPyOpenCLArrayContext): import pyopencl.array as cla - arg = cla.to_device(actx.queue, np.array(arg)) + arg = cla.to_device(actx.queue, np.array(arg), + allocator=actx.allocator) elif isinstance(actx, PytatoJAXArrayContext): import jax arg = jax.device_put(arg) -- GitLab From 21105fe3fb32e829e221820773046c069cd3a7a4 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 12 Aug 2022 21:50:15 -0500 Subject: [PATCH 13/17] changed requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d3a3425..43306fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,6 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy -git+https://gitlab.tiker.net/kaushikcfd/loopy.git@pycuda_tgt#egg=loopy +git+https://gitlab.tiker.net/inducer/loopy.git@pycuda_tgt#egg=loopy +git+https://gitlab.tiker.net/kaushikcfd/pycuda.git@cudagraph#egg-pycuda git+https://gitlab.tiker.net/kaushikcfd/pytato.git@cudagraph#egg-pytato -- GitLab From b1ca7e2d92b161edd9530444635be70c25cadb52 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 13 Aug 2022 10:36:34 -0500 Subject: [PATCH 14/17] fixed flake8 --- arraycontext/impl/pytato/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 91deb4c..b3f413b 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -828,6 +828,7 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): strict: bool = False) -> ArrayOrContainer: if allowed_types is None: allowed_types = self.array_types + def _wrapper(ary): if isinstance(ary, allowed_types): return func(ary) -- GitLab From 4887b829fceff6e4e3c239b6fc5066e8bf4ceaf3 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Tue, 23 Aug 2022 12:40:53 -0500 Subject: [PATCH 15/17] removing show_code --- arraycontext/impl/pytato/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index bf54ddc..99bef74 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -916,7 +916,7 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): pt_dict_of_named_arrays = pt.make_dict_of_named_arrays(key_to_pt_arrays) transformed_dag = self.transform_dag(pt_dict_of_named_arrays) - pt_prg = pt.generate_cudagraph(transformed_dag, show_code=True) + pt_prg = pt.generate_cudagraph(transformed_dag) out_dict = pt_prg() assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 -- GitLab From e7caf37bb2b603c7da849357683ee06414f4f055 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sun, 16 Oct 2022 15:54:42 -0500 Subject: [PATCH 16/17] added allocator to init --- arraycontext/impl/pytato/__init__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 99bef74..06519f5 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -804,7 +804,8 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): def __init__(self, *, compile_trace_callback: Optional[Callable[[Any, str, Any], None]] - = None) -> None: + = None, + allocator=None) -> None: """ :arg compile_trace_callback: A function of three arguments *(what, stage, ir)*, where *what* identifies the object @@ -815,8 +816,18 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): """ import pytato as pt from pycuda.gpuarray import GPUArray + import pycuda super().__init__(compile_trace_callback=compile_trace_callback) self.array_types = (pt.Array, GPUArray) + if allocator is None: + self.allocator = pycuda.driver.mem_alloc + from warnings import warn + warn("PyCUDAArrayContext created without an allocator on a GPU. " + "This can lead to high numbers of memory allocations. " + "Please consider using a pycuda.autoinit. " + "Run with allocator=False to disable this warning.") + else: + self.allocator = allocator @property def _frozen_array_types(self) -> Tuple[Type, ...]: -- GitLab From 8994e1eecd89c87ff14d0e9cc547480989f27e8e Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Tue, 18 Oct 2022 14:51:11 -0500 Subject: [PATCH 17/17] added allocator option --- arraycontext/impl/pytato/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 06519f5..94d6045 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -822,7 +822,7 @@ class PytatoCUDAGraphArrayContext(_BasePytatoArrayContext): if allocator is None: self.allocator = pycuda.driver.mem_alloc from warnings import warn - warn("PyCUDAArrayContext created without an allocator on a GPU. " + warn("PytatoCUDAGraphArrayContext created without an allocator on a GPU. " "This can lead to high numbers of memory allocations. " "Please consider using a pycuda.autoinit. " "Run with allocator=False to disable this warning.") -- GitLab