diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 30384724ae8f48bd240ac0e6c820da1b26ec921c..1fc8f6a75c2d8366d07446d71d869063aaaf90ca 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,22 +7,6 @@ Flake8: except: - tags -Pylint: - script: - # Needed to avoid name shadowing issues when running from source directory. - # Pylint won't find the Cython bits without this - - PROJECT_INSTALL_FLAGS="--editable" - - export PY_EXE=python3 - # Pin to numpy 1.15 - # See https://github.com/PyCQA/pylint/issues/2721 - - EXTRA_INSTALL="Cython pybind11 numpy==1.15 mako matplotlib" - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh - - ". ./prepare-and-run-pylint.sh volumential test examples benchmarks" - tags: - - python3 - except: - - tags - Python 3 POCL: script: - export PY_EXE=python3 diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml index 1735795e145892dc4169fdeb502e460e1f7b0154..cca9d5b25425cb8f1b1b028c2f15595aa7ac369b 100644 --- a/.test-conda-env-py3-macos.yml +++ b/.test-conda-env-py3-macos.yml @@ -4,42 +4,42 @@ channels: - defaults dependencies: - git - - c-compiler - - pytest - - pytest-cov - - binutils - - scipy - - conda-forge::numpy - - conda-forge::sympy + - python>=3.8 + - dealii - pocl - - islpy - pyopencl - - python>=3.6 - - symengine=0.3.0 - - python-symengine=0.3.0 + - pyfmmlib + - scipy + - numpy + - sympy + - symengine + - python-symengine - cython + - cgen + - toml + - py + - pluggy + - packaging + - iniconfig + - attrs + - genpy + - colorama + - islpy + - pyrsistent + - h5py + - pyevtk + - pytest + - pytest-cov - cmake - - tbb-devel - - dealii - - gmsh - - python-gmsh - - pyfmmlib - # for OpenMP support in pyfmmlib - - libgfortran>=3.0.1 - - clangdev + # enable openmp on osx - openmp - # for pypvfmm - - openmpi-mpicxx - - fftw - - autoconf - - automake - - libtool + - clangdev - pip - pip: - git+https://gitlab.tiker.net/inducer/loopy - git+https://gitlab.tiker.net/inducer/gmsh_interop - - git+https://gitlab.tiker.net/xywei/boxtree + - git+https://gitlab.tiker.net/inducer/boxtree - git+https://gitlab.tiker.net/inducer/modepy - git+https://gitlab.tiker.net/inducer/meshmode - git+https://gitlab.tiker.net/inducer/pymbolic diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index d5797098d4bb1ebe1790126a6776b9d0a03a84da..bbe9d8bc78aa0e31f3159c3f404eaa5943631bd6 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -4,36 +4,40 @@ channels: - defaults dependencies: - git - - c-compiler - - binutils - - openmp - - scipy - - conda-forge::numpy - - conda-forge::sympy - - pytest - - pytest-cov + - python>=3.8 + - dealii - pocl - - islpy - pyopencl - - python=3 - - symengine=0.3.0 - - python-symengine=0.3.0 - pyfmmlib + - scipy + - numpy + - sympy + - matplotlib + - cgen - cython + - toml + - py + - pluggy + - packaging + - iniconfig + - attrs + - genpy + - colorama + - islpy + - pyrsistent + - h5py + - pyevtk + - pytest + - pytest-cov - cmake - - tbb-devel - - dealii - - gmsh - - python-gmsh - # for pypvfmm - - openmpi-mpicxx - - fftw + - symengine + - python-symengine - pip - pip: - git+https://gitlab.tiker.net/inducer/loopy - git+https://gitlab.tiker.net/inducer/gmsh_interop - - git+https://gitlab.tiker.net/xywei/boxtree + - git+https://gitlab.tiker.net/inducer/boxtree - git+https://gitlab.tiker.net/inducer/modepy - git+https://gitlab.tiker.net/inducer/meshmode - git+https://gitlab.tiker.net/inducer/pymbolic diff --git a/requirements.txt b/requirements.txt index be9ec06fd84f09dc4c5ded3c20e3b053de7ad13f..16f533f24a1af1c57e4b32eb8b04abd83be28b98 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ filelock -e git+https://gitlab.tiker.net/inducer/pyvisfile.git#egg=pyvisfile -e git+https://gitlab.tiker.net/inducer/pymbolic.git#egg=pymbolic -e git+https://gitlab.tiker.net/inducer/sumpy.git#egg=sumpy --e git+https://gitlab.tiker.net/inducer/loopy.git#egg=loo.py +-e git+https://gitlab.tiker.net/inducer/loopy.git#egg=loopy -e git+https://gitlab.tiker.net/inducer/modepy.git#egg=modepy -e .[test,doc] diff --git a/setup.py b/setup.py index 8b037de997fa0153013ace3061e836840ce2b883..eeb43f8ffdafebb553cd427ab7a90611c1d4458b 100644 --- a/setup.py +++ b/setup.py @@ -112,7 +112,7 @@ def main(): install_requires=[ "boxtree", "h5py", - "loo.py", + "loopy", "meshmode", "modepy", "pyevtk", diff --git a/volumential/droste.py b/volumential/droste.py index 382fa2fd647b53006ae4b43dfb33f66f4a7c980a..bceed02afa33121c2f0ef68d0bf6b6faca0b5c7a 100644 --- a/volumential/droste.py +++ b/volumential/droste.py @@ -802,6 +802,7 @@ class DrosteFull(DrosteBase): ncpus = multiprocessing.cpu_count() knl = self.get_kernel(**kwargs) knl = lp.split_iname(knl, "icase", ncpus, inner_tag="g.0") + knl = lp.add_inames_for_unused_hw_axes(knl) return knl def call_loopy_kernel(self, queue, **kwargs): @@ -1283,6 +1284,7 @@ class DrosteReduced(DrosteBase): knl = self.get_kernel(**kwargs) knl = lp.join_inames(knl, inames=self.basis_vars, new_iname="func") knl = lp.split_iname(knl, "func", ncpus, inner_tag="g.0") + knl = lp.add_inames_for_unused_hw_axes(knl) return knl def call_loopy_kernel_case(self, queue, base_case_id, **kwargs): diff --git a/volumential/expansion_wrangler_fpnd.py b/volumential/expansion_wrangler_fpnd.py index fe9b059eb497d4cb8add2786b75ab4da2876a5fd..b691f63f4d7edd57cb36c2dd68f229d47f2d88a6 100644 --- a/volumential/expansion_wrangler_fpnd.py +++ b/volumential/expansion_wrangler_fpnd.py @@ -124,8 +124,8 @@ class FPNDSumpyExpansionWrangler( ): """ near_field_table can either one of three things: - 1. a single table, when len(out_kernels) = 1 (single level) - 2. a list of tables, when len(out_kernels) = 1 (multiple levels) + 1. a single table, when len(target_kernels) = 1 (single level) + 2. a list of tables, when len(target_kernels) = 1 (multiple levels) 3. otherwise, a dictionary from kernel.__repr__() to a list of its tables """ @@ -136,16 +136,16 @@ class FPNDSumpyExpansionWrangler( self.near_field_table = {} # list of tables for a single out kernel if isinstance(near_field_table, list): - assert len(self.code.out_kernels) == 1 + assert len(self.code.target_kernels) == 1 self.near_field_table[ - self.code.out_kernels[0].__repr__() + self.code.target_kernels[0].__repr__() ] = near_field_table self.n_tables = len(near_field_table) # single table elif isinstance(near_field_table, NearFieldInteractionTable): - assert len(self.code.out_kernels) == 1 - self.near_field_table[self.code.out_kernels[0].__repr__()] = [ + assert len(self.code.target_kernels) == 1 + self.near_field_table[self.code.target_kernels[0].__repr__()] = [ near_field_table ] self.n_tables = 1 @@ -153,7 +153,7 @@ class FPNDSumpyExpansionWrangler( # dictionary of lists of tables elif isinstance(near_field_table, dict): self.n_tables = dict() - for out_knl in self.code.out_kernels: + for out_knl in self.code.target_kernels: if repr(out_knl) not in near_field_table: raise RuntimeError( "Missing nearfield table for %s." % repr(out_knl)) @@ -174,15 +174,15 @@ class FPNDSumpyExpansionWrangler( self.potential_kind = potential_kind # TODO: make all parameters table-specific (allow using inhomogeneous tables) - kname = repr(self.code.out_kernels[0]) + kname = repr(self.code.target_kernels[0]) self.root_table_source_box_extent = ( self.near_field_table[kname][0].source_box_extent) table_starting_level = np.round( np.log(self.tree.root_extent / self.root_table_source_box_extent) / np.log(2) ) - for kid in range(len(self.code.out_kernels)): - kname = self.code.out_kernels[kid].__repr__() + for kid in range(len(self.code.target_kernels)): + kname = self.code.target_kernels[kid].__repr__() for lev, table in zip( range(len(self.near_field_table[kname])), self.near_field_table[kname] @@ -481,11 +481,11 @@ class FPNDSumpyExpansionWrangler( ): pot = self.output_zeros() events = [] - for i in range(len(self.code.out_kernels)): + for i in range(len(self.code.target_kernels)): # print("processing near-field of out_kernel", i) pot[i], evt = self.eval_direct_single_out_kernel( pot[i], - self.code.out_kernels[i], + self.code.target_kernels[i], target_boxes, neighbor_source_boxes_starts, neighbor_source_boxes_lists, @@ -592,12 +592,12 @@ class FPNDFMMLibExpansionWranglerCodeContainer( """ def __init__(self, cl_context, multipole_expansion_factory, local_expansion_factory, - out_kernels, exclude_self=True, *args, **kwargs): + target_kernels, exclude_self=True, *args, **kwargs): self.cl_context = cl_context self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory - self.out_kernels = out_kernels + self.target_kernels = target_kernels self.exclude_self = True def get_wrangler(self, queue, tree, dtype, fmm_level_to_order, @@ -649,14 +649,14 @@ class FPNDFMMLibExpansionWrangler( self.quad_order = quad_order self.potential_kind = potential_kind - # {{{ digest out_kernels + # {{{ digest target_kernels ifgrad = False outputs = [] source_deriv_names = [] k_names = [] - for out_knl in self.code.out_kernels: + for out_knl in self.code.target_kernels: if self.is_supported_helmknl(out_knl): outputs.append(()) @@ -711,16 +711,16 @@ class FPNDFMMLibExpansionWrangler( self.near_field_table = {} # list of tables for a single out kernel if isinstance(near_field_table, list): - assert len(self.code.out_kernels) == 1 + assert len(self.code.target_kernels) == 1 self.near_field_table[ - self.code.out_kernels[0].__repr__() + self.code.target_kernels[0].__repr__() ] = near_field_table self.n_tables = len(near_field_table) # single table elif isinstance(near_field_table, NearFieldInteractionTable): - assert len(self.code.out_kernels) == 1 - self.near_field_table[self.code.out_kernels[0].__repr__()] = [ + assert len(self.code.target_kernels) == 1 + self.near_field_table[self.code.target_kernels[0].__repr__()] = [ near_field_table ] self.n_tables = 1 @@ -728,7 +728,7 @@ class FPNDFMMLibExpansionWrangler( # dictionary of lists of tables elif isinstance(near_field_table, dict): self.n_tables = dict() - for out_knl in self.code.out_kernels: + for out_knl in self.code.target_kernels: if repr(out_knl) not in near_field_table: raise RuntimeError( "Missing nearfield table for %s." % repr(out_knl)) @@ -746,15 +746,15 @@ class FPNDFMMLibExpansionWrangler( raise RuntimeError("Table type unrecognized.") # TODO: make all parameters table-specific (allow using inhomogeneous tables) - kname = repr(self.code.out_kernels[0]) + kname = repr(self.code.target_kernels[0]) self.root_table_source_box_extent = ( self.near_field_table[kname][0].source_box_extent) table_starting_level = np.round( np.log(self.tree.root_extent / self.root_table_source_box_extent) / np.log(2) ) - for kid in range(len(self.code.out_kernels)): - kname = self.code.out_kernels[kid].__repr__() + for kid in range(len(self.code.target_kernels)): + kname = self.code.target_kernels[kid].__repr__() for lev, table in zip( range(len(self.near_field_table[kname])), self.near_field_table[kname] @@ -1090,11 +1090,11 @@ class FPNDFMMLibExpansionWrangler( if pot.dtype != np.object: pot = make_obj_array([pot, ]) events = [] - for i in range(len(self.code.out_kernels)): + for i in range(len(self.code.target_kernels)): # print("processing near-field of out_kernel", i) pot[i], evt = self.eval_direct_single_out_kernel( pot[i], - self.code.out_kernels[i], + self.code.target_kernels[i], target_boxes, neighbor_source_boxes_starts, neighbor_source_boxes_lists, @@ -1107,7 +1107,7 @@ class FPNDFMMLibExpansionWrangler( out_pot.finish() # boxtree.pyfmmlib_integration handles things diffferently - # when out_kernels has only one element + # when target_kernels has only one element if len(pot) == 1: pot = pot[0] diff --git a/volumential/volume_fmm.py b/volumential/volume_fmm.py index 4504f9fbcde3c6b2d6717af384adeab0f2abe529..23685ce34fec52d2162f9230972feb03e9209469 100644 --- a/volumential/volume_fmm.py +++ b/volumential/volume_fmm.py @@ -29,6 +29,7 @@ __doc__ = """ import numpy as np import pyopencl as cl +from pytools.obj_array import make_obj_array from boxtree.fmm import TimingRecorder from volumential.expansion_wrangler_interface import ExpansionWranglerInterface from volumential.expansion_wrangler_fpnd import ( @@ -79,9 +80,19 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, recorder = TimingRecorder() logger.info("start fmm") + # accept unpacked inputs when doing fmm for just one source field + if src_weights.ndim == 1: + src_weights = make_obj_array([src_weights]) + if src_func.ndim == 1: + src_func = make_obj_array([src_func]) + + assert (ns := len(src_weights)) == len(src_func) + if ns > 1: + raise NotImplementedError("Multiple outputs are not yet supported") + if isinstance(expansion_wrangler, FPNDSumpyExpansionWrangler): - assert isinstance(src_weights, cl.array.Array) - assert isinstance(src_func, cl.array.Array) + assert all(isinstance(sw, cl.array.Array) for sw in src_weights) + assert all(isinstance(sf, cl.array.Array) for sf in src_func) elif isinstance(expansion_wrangler, FPNDFMMLibExpansionWrangler): @@ -94,8 +105,9 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, if reorder_sources: logger.debug("reorder source weights") - src_weights = wrangler.reorder_sources(src_weights) - src_func = wrangler.reorder_targets(src_func) + for idx_s in range(ns): + src_weights[idx_s] = wrangler.reorder_sources(src_weights[idx_s]) + src_func[idx_s] = wrangler.reorder_targets(src_func[idx_s]) # {{{ Construct local multipoles @@ -130,7 +142,7 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, traversal.target_boxes, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, - src_func, + src_func[0], # FIXME: handle multiple source fields ) recorder.add("eval_direct", timing_future) @@ -179,7 +191,7 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, p2p = P2P( wrangler.queue.context, - wrangler.code.out_kernels, + wrangler.code.target_kernels, exclude_self=wrangler.code.exclude_self, value_dtypes=[wrangler.dtype], )