diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1b302bbb07dcc6199bfc7738fd4f4472238bfa6d..ee8f171cab2973d8bf8c318a8b808d151d982bf1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,8 +1,15 @@ +# Environment variables +# +# * PYTEST_ADDOPTS is used to filter test runs. The default value is "-k-slowtest", +# which skips the slow running tests. +# * SKIP_EXAMPLES, if non-empty, can be used to skip the examples job. + Python 2.7 POCL: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 scipy numpy mako" + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -19,7 +26,8 @@ Python 3 POCL: script: - export PY_EXE=python3 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy scipy mako" + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -37,7 +45,7 @@ Python 3 POCL Examples: - test -n "$SKIP_EXAMPLES" && exit - export PY_EXE=python3 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako pyvisfile matplotlib" + - export EXTRA_INSTALL="Cython pybind11 numpy mako pyvisfile matplotlib" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh - ". ./build-py-project-and-run-examples.sh" tags: @@ -53,8 +61,9 @@ Python 3 POCL Examples: Python 3 Conda: script: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -71,9 +80,11 @@ Python 3 Conda Apple: script: - export LC_ALL=en_US.UTF-8 - export LANG=en_US.UTF-8 - - export PYTEST_ADDOPTS=-k-slowtest - - CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CC=gcc + - set -o xtrace - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" @@ -95,7 +106,7 @@ Python 3 Conda Apple: Documentation: script: - - EXTRA_INSTALL="pybind11 numpy mako" + - EXTRA_INSTALL="Cython pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh - ". ./build-docs.sh" tags: diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml index b71555f6d6e474a6b0cef0c9d9ed03c168d88a79..37281180b48f654f6c9a2f3d75d0d4298825edb9 100644 --- a/.test-conda-env-py3-macos.yml +++ b/.test-conda-env-py3-macos.yml @@ -13,7 +13,8 @@ dependencies: - python=3.6 - symengine=0.3.0 - python-symengine=0.3.0 +- osx-pocl-opencl - pyfmmlib -# for OpenMP support in pyfmmlib -- libgfortran>=3.0.1 +- gcc +- cython # things not in here: loopy boxtree pymbolic meshmode sumpy diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index a0803d620f2a0ff994aca677746813dafaaf27fc..2ac856756e0df70b068c71d725c8999f0eaf3b03 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -14,4 +14,5 @@ dependencies: - symengine=0.3.0 - python-symengine=0.3.0 - pyfmmlib +- cython # things not in here: loopy boxtree pymbolic meshmode sumpy diff --git a/README.rst b/README.rst index dfc5fe49b46f4a9ea13debdc136e9fbf3848c79d..ad3f79057516a8347148e9e9451aada8a73f553b 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ It relies on * `boxtree `_ for FMM tree building * `sumpy `_ for expansions and analytical routines * `modepy `_ for modes and nodes on simplices -* `meshmode `_ for modes and nodes on simplices +* `meshmode `_ for high order discretizations * `loopy `_ for fast array operations * `pytest `_ for automated testing diff --git a/doc/misc.rst b/doc/misc.rst index f551887fe53b96ed924be0019188770d820e4d33..9fac9ab446ed3ea3ae1ef7e3b6790537eb90930b 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -11,7 +11,9 @@ This set of instructions is intended for 64-bit Linux and macOS computers. On Debian derivatives (Ubuntu and many more), installing ``build-essential`` should do the trick. - On macOS, run ``xcode-select --install`` to install build tools. + On macOS, run ``xcode-select --install`` to install build tools. On Mojave (10.14), + you may need to follow `these steps `_ + as well. Everywhere else, just making sure you have the ``g++`` package should be enough. @@ -31,11 +33,14 @@ This set of instructions is intended for 64-bit Linux and macOS computers. #. ``conda config --add channels conda-forge`` -#. ``conda install git pip pocl islpy pyopencl sympy pyfmmlib pytest`` +#. (*macOS only*) ``conda install osx-pocl-opencl pocl pyopencl`` -#. Type the following command:: +#. ``conda install gcc cython git pip pocl islpy pyopencl sympy pyfmmlib pytest`` - hash -r; for i in pymbolic cgen genpy gmsh_interop modepy pyvisfile loopy boxtree sumpy meshmode pytential; do python -m pip install git+https://github.com/inducer/$i; done +#. Type the following commands:: + + hash -r; for i in pymbolic cgen genpy gmsh_interop modepy pyvisfile loopy boxtree sumpy meshmode; do python -m pip install git+https://github.com/inducer/$i; done + CC=gcc python -m pip install git+https://gitlab.tiker.net/inducer/pytential Next time you want to use :mod:`pytential`, just run the following command:: diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 832ddcbcf2d2f5caff9f57c4d2fdf525047240f0..29237a198aaa9b20ffd8a6829dd5b55374812dca 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -83,6 +83,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_crit=None, _from_sep_smaller_min_nsources_cumul=None, _tree_kind="adaptive", + _use_target_specific_qbx=False, geometry_data_inspector=None, fmm_backend="sumpy", target_stick_out_factor=_not_provided): @@ -202,6 +203,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self._from_sep_smaller_min_nsources_cumul = \ _from_sep_smaller_min_nsources_cumul self._tree_kind = _tree_kind + self._use_target_specific_qbx = _use_target_specific_qbx self.geometry_data_inspector = geometry_data_inspector # /!\ *All* parameters set here must also be set by copy() below, @@ -224,6 +226,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _box_extent_norm=None, _from_sep_smaller_crit=None, _tree_kind=None, + _use_target_specific_qbx=_not_provided, geometry_data_inspector=None, fmm_backend=None, @@ -306,6 +309,9 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_min_nsources_cumul=( self._from_sep_smaller_min_nsources_cumul), _tree_kind=_tree_kind or self._tree_kind, + _use_target_specific_qbx=(_use_target_specific_qbx + if _use_target_specific_qbx is not _not_provided + else self._use_target_specific_qbx), geometry_data_inspector=( geometry_data_inspector or self.geometry_data_inspector), fmm_backend=fmm_backend or self.fmm_backend, @@ -745,14 +751,15 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self.qbx_order, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, - kernel_extra_kwargs=kernel_extra_kwargs) + kernel_extra_kwargs=kernel_extra_kwargs, + _use_target_specific_qbx=self._use_target_specific_qbx) from pytential.qbx.geometry import target_state if (geo_data.user_target_to_center().with_queue(queue) == target_state.FAILED).any().get(): raise RuntimeError("geometry has failed targets") - # {{{ performance data hook + # {{{ geometry data inspection hook if self.geometry_data_inspector is not None: perform_fmm = self.geometry_data_inspector(insn, bound_expr, geo_data) diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index badf630046b239303344e1b1a3664e706a12a0f4..7ec5be2375c2f1e6bfef0b5da9828b1761beb2e0 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -91,12 +91,14 @@ class QBXSumpyExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + _use_target_specific_qbx=False): return QBXExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs) + kernel_extra_kwargs, + _use_target_specific_qbx) class QBXExpansionWrangler(SumpyExpansionWrangler): @@ -120,7 +122,11 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), def __init__(self, code_container, queue, geo_data, dtype, qbx_order, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs): + source_extra_kwargs, kernel_extra_kwargs, + _use_target_specific_qbx=False): + if _use_target_specific_qbx: + raise ValueError("TSQBX is not implemented in sumpy") + SumpyExpansionWrangler.__init__(self, code_container, queue, geo_data.tree(), dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs) @@ -369,6 +375,10 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), return (pot, SumpyTimingFuture(self.queue, events)) + @log_process(logger) + def eval_target_specific_qbx_locals(self, src_weights): + raise NotImplementedError() + # }}} # }}} @@ -376,7 +386,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), # {{{ FMM top-level -def drive_fmm(expansion_wrangler, src_weights, timing_data=None): +def drive_fmm(expansion_wrangler, src_weights, timing_data=None, + traversal=None): """Top-level driver routine for the QBX fast multipole calculation. :arg geo_data: A :class:`QBXFMMGeometryData` instance. @@ -394,8 +405,14 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None): wrangler = expansion_wrangler geo_data = wrangler.geo_data - traversal = geo_data.traversal() + + use_tsqbx = geo_data.lpot_source._use_target_specific_qbx + + if traversal is None: + traversal = geo_data.traversal() + tree = traversal.tree + recorder = TimingRecorder() # Interface guidelines: Attributes of the tree are assumed to be known @@ -515,9 +532,12 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None): # {{{ wrangle qbx expansions - qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights) + if not use_tsqbx: + qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights) - recorder.add("form_global_qbx_locals", timing_future) + recorder.add("form_global_qbx_locals", timing_future) + else: + qbx_expansions = wrangler.qbx_local_expansion_zeros() local_result, timing_future = ( wrangler.translate_box_multipoles_to_qbx_local(mpole_exps)) @@ -537,6 +557,14 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None): recorder.add("eval_qbx_expansions", timing_future) + if use_tsqbx: + tsqbx_result, timing_future = ( + wrangler.eval_target_specific_qbx_locals(src_weights)) + + recorder.add("eval_target_specific_qbx_locals", timing_future) + + qbx_potentials = qbx_potentials + tsqbx_result + # }}} # {{{ reorder potentials diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index 2d21a3d2ac6865d0d8d3c3d3f41c388b0fe160b0..beff36d0dfb50331c4345e59852a54af2fec35d0 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -23,11 +23,14 @@ THE SOFTWARE. """ import numpy as np -from pytools import memoize_method, Record +from pytools import Record, memoize_method import pyopencl as cl # noqa import pyopencl.array # noqa: F401 from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler -from sumpy.kernel import LaplaceKernel, HelmholtzKernel +from sumpy.kernel import ( + LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, + DirectionalSourceDerivative) +import pytential.qbx.target_specific as ts from boxtree.tools import return_timing_data @@ -55,70 +58,14 @@ class QBXFMMLibExpansionWranglerCodeContainer(object): def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + _use_target_specific_qbx=False): return QBXFMMLibExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs) - -# }}} - - -# {{{ host geo data wrapper - -class ToHostTransferredGeoDataWrapper(object): - def __init__(self, queue, geo_data): - self.queue = queue - self.geo_data = geo_data - - @memoize_method - def tree(self): - return self.traversal().tree - - @memoize_method - def traversal(self): - return self.geo_data.traversal().get(queue=self.queue) - - @property - def ncenters(self): - return self.geo_data.ncenters - - @memoize_method - def centers(self): - return np.array([ - ci.get(queue=self.queue) - for ci in self.geo_data.centers()]) - - @memoize_method - def expansion_radii(self): - return self.geo_data.expansion_radii().get(queue=self.queue) - - @memoize_method - def global_qbx_centers(self): - return self.geo_data.global_qbx_centers().get(queue=self.queue) - - @memoize_method - def qbx_center_to_target_box(self): - return self.geo_data.qbx_center_to_target_box().get(queue=self.queue) - - @memoize_method - def qbx_center_to_target_box_source_level(self, source_level): - return self.geo_data.qbx_center_to_target_box_source_level( - source_level).get(queue=self.queue) - - @memoize_method - def non_qbx_box_target_lists(self): - return self.geo_data.non_qbx_box_target_lists().get(queue=self.queue) - - @memoize_method - def center_to_tree_targets(self): - return self.geo_data.center_to_tree_targets().get(queue=self.queue) - - @memoize_method - def all_targets(self): - """All (not just non-QBX) targets packaged into a single array.""" - return np.array(list(self.tree().targets)) + kernel_extra_kwargs, + _use_target_specific_qbx) # }}} @@ -129,73 +76,80 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): def __init__(self, code, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs): + kernel_extra_kwargs, + _use_target_specific_qbx=False): self.code = code self.queue = queue + self._use_target_specific_qbx = _use_target_specific_qbx # FMMLib is CPU-only. This wrapper gets the geometry out of # OpenCL-land. + from pytential.qbx.utils import ToHostTransferredGeoDataWrapper self.geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) self.qbx_order = qbx_order # {{{ digest out_kernels - from sumpy.kernel import AxisTargetDerivative, DirectionalSourceDerivative - - k_names = [] - source_deriv_names = [] - - def is_supported_helmknl(knl): - if isinstance(knl, DirectionalSourceDerivative): - source_deriv_name = knl.dir_vec_name - knl = knl.inner_kernel - else: - source_deriv_name = None - - if isinstance(knl, HelmholtzKernel) and knl.dim in [2, 3]: - k_names.append(knl.helmholtz_k_name) - source_deriv_names.append(source_deriv_name) - return True - elif isinstance(knl, LaplaceKernel) and knl.dim in [2, 3]: - k_names.append(None) - source_deriv_names.append(source_deriv_name) - return True - - return False - ifgrad = False outputs = [] + source_deriv_names = [] + k_names = [] + for out_knl in self.code.out_kernels: - if is_supported_helmknl(out_knl): + if ( + _use_target_specific_qbx + and not self.is_supported_helmknl_for_tsqbx(out_knl)): + raise ValueError("not all kernels passed support TSQBX") + + if self.is_supported_helmknl(out_knl): outputs.append(()) + no_target_deriv_knl = out_knl + elif (isinstance(out_knl, AxisTargetDerivative) - and is_supported_helmknl(out_knl.inner_kernel)): + and self.is_supported_helmknl(out_knl.inner_kernel)): outputs.append((out_knl.axis,)) ifgrad = True + no_target_deriv_knl = out_knl.inner_kernel + else: - raise NotImplementedError( + raise ValueError( "only the 2/3D Laplace and Helmholtz kernel " "and their derivatives are supported") + source_deriv_names.append(no_target_deriv_knl.dir_vec_name + if isinstance(no_target_deriv_knl, DirectionalSourceDerivative) + else None) + + base_knl = out_knl.get_base_kernel() + k_names.append(base_knl.helmholtz_k_name + if isinstance(base_knl, HelmholtzKernel) + else None) + + self.outputs = outputs + from pytools import is_single_valued + if not is_single_valued(source_deriv_names): raise ValueError("not all kernels passed are the same in " "whether they represent a source derivative") source_deriv_name = source_deriv_names[0] - self.outputs = outputs - # }}} + if not is_single_valued(k_names): + raise ValueError("not all kernels passed have the same " + "Helmholtz parameter") + + k_name = k_names[0] - from pytools import single_valued - k_name = single_valued(k_names) if k_name is None: helmholtz_k = 0 else: helmholtz_k = kernel_extra_kwargs[k_name] + # }}} + dipole_vec = None if source_deriv_name is not None: dipole_vec = np.array([ @@ -204,7 +158,6 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): order="F") def inner_fmm_level_to_nterms(tree, level): - from sumpy.kernel import LaplaceKernel, HelmholtzKernel if helmholtz_k == 0: return fmm_level_to_order( LaplaceKernel(tree.dimensions), @@ -225,6 +178,23 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): ifgrad=ifgrad) + @staticmethod + def is_supported_helmknl_for_tsqbx(knl): + # Supports at most one derivative. + if isinstance(knl, (DirectionalSourceDerivative, AxisTargetDerivative)): + knl = knl.inner_kernel + + return (isinstance(knl, (LaplaceKernel, HelmholtzKernel)) + and knl.dim == 3) + + @staticmethod + def is_supported_helmknl(knl): + if isinstance(knl, DirectionalSourceDerivative): + knl = knl.inner_kernel + + return (isinstance(knl, (LaplaceKernel, HelmholtzKernel)) + and knl.dim in (2, 3)) + # {{{ data vector helpers def output_zeros(self): @@ -276,6 +246,13 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): raise ValueError("element '%s' of outputs array not " "understood" % out) + @memoize_method + def _get_single_centers_array(self): + return np.array([ + self.geo_data.centers()[idim] + for idim in range(self.dim) + ], order="F") + # }}} # {{{ override target lists to only hit non-QBX targets @@ -304,6 +281,9 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): @log_process(logger) @return_timing_data def form_global_qbx_locals(self, src_weights): + if self._use_target_specific_qbx: + return self.qbx_local_expansion_zeros() + geo_data = self.geo_data trav = geo_data.traversal() @@ -572,7 +552,7 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): taeval = self.get_expn_eval_routine("ta") - for isrc_center, src_icenter in enumerate(global_qbx_centers): + for src_icenter in global_qbx_centers: for icenter_tgt in range( ctt.starts[src_icenter], ctt.starts[src_icenter+1]): @@ -592,6 +572,60 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): return output + @log_process(logger) + @return_timing_data + def eval_target_specific_qbx_locals(self, src_weights): + if not self._use_target_specific_qbx: + return self.full_output_zeros() + + geo_data = self.geo_data + trav = geo_data.traversal() + + ctt = geo_data.center_to_tree_targets() + + src_weights = src_weights.astype(np.complex128) + + ifcharge = self.dipole_vec is None + ifdipole = self.dipole_vec is not None + + ifpot = any(not output for output in self.outputs) + ifgrad = self.ifgrad + + # Create temporary output arrays for potential / gradient. + pot = np.zeros(self.tree.ntargets, np.complex) if ifpot else None + grad = ( + np.zeros((self.dim, self.tree.ntargets), np.complex) + if ifgrad else None) + + ts.eval_target_specific_qbx_locals( + ifpot=ifpot, + ifgrad=ifgrad, + ifcharge=ifcharge, + ifdipole=ifdipole, + order=self.qbx_order, + sources=self._get_single_sources_array(), + targets=geo_data.all_targets(), + centers=self._get_single_centers_array(), + qbx_centers=geo_data.global_qbx_centers(), + qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), + center_to_target_starts=ctt.starts, + center_to_target_lists=ctt.lists, + source_box_starts=trav.neighbor_source_boxes_starts, + source_box_lists=trav.neighbor_source_boxes_lists, + box_source_starts=self.tree.box_source_starts, + box_source_counts_nonchild=self.tree.box_source_counts_nonchild, + helmholtz_k=self.kernel_kwargs.get("zk", 0), + charge=src_weights, + dipstr=src_weights, + dipvec=self.dipole_vec, + pot=pot, + grad=grad) + + output = self.full_output_zeros() + self.add_potgrad_onto_output(output, slice(None), pot, grad) + + return output + def finalize_potentials(self, potential): potential = super(QBXFMMLibExpansionWrangler, self).finalize_potentials( potential) diff --git a/pytential/qbx/target_assoc.py b/pytential/qbx/target_assoc.py index 11b70c4b2f1cee8db85fd2c00a159a28682d7fa1..5273b72b6fad7927245ede5195ae79ee7d4ea187 100644 --- a/pytential/qbx/target_assoc.py +++ b/pytential/qbx/target_assoc.py @@ -528,9 +528,9 @@ class TargetAssociationWrangler(TreeWranglerBase): return (found_target_close_to_panel == 1).all().get() @log_process(logger) - def try_find_centers(self, tree, peer_lists, lpot_source, - target_status, target_flags, target_assoc, - target_association_tolerance, debug, wait_for=None): + def find_centers(self, tree, peer_lists, lpot_source, + target_status, target_flags, target_assoc, + target_association_tolerance, debug, wait_for=None): # Round up level count--this gets included in the kernel as # a stack bound. Rounding avoids too many kernel versions. from pytools import div_ceil @@ -750,7 +750,7 @@ def associate_targets_to_qbx_centers(lpot_source, wrangler, target_flags = wrangler.make_target_flags(target_discrs_and_qbx_sides) - wrangler.try_find_centers(tree, peer_lists, lpot_source, target_status, + wrangler.find_centers(tree, peer_lists, lpot_source, target_status, target_flags, target_assoc, target_association_tolerance, debug) center_not_found = ( diff --git a/pytential/qbx/target_specific/__init__.py b/pytential/qbx/target_specific/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e6f10bba8c64d193c69110a13b91cac15d4de6ab --- /dev/null +++ b/pytential/qbx/target_specific/__init__.py @@ -0,0 +1,24 @@ +__copyright__ = "Copyright (C) 2018 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from .impl import * # noqa diff --git a/pytential/qbx/target_specific/helmholtz_utils.c b/pytential/qbx/target_specific/helmholtz_utils.c new file mode 100644 index 0000000000000000000000000000000000000000..2c06b7fc22863a022edbe296a0830a4a8d7e2500 --- /dev/null +++ b/pytential/qbx/target_specific/helmholtz_utils.c @@ -0,0 +1,515 @@ +/* This file contains routines for evaluating spherical Bessel and Hankel + functions. + + This is based on cdjseval3d.f and helmrouts3d.f from fmmlib3d, translated + with a hacked version of f2c and manually postprocessed. */ + +/* Original copyright notice: */ + +/* ********************************************************************** + +Copyright (c) 2009-2012, Leslie Greengard, Zydrunas Gimbutas +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +********************************************************************** */ + +#include + +/* declarations for f2c generated things */ + +typedef int integer; +typedef double doublereal; +typedef double complex doublecomplex; + +static inline double z_abs(doublecomplex *z) { + return cabs(*z); +} + +static inline void z_exp(doublecomplex *out, doublecomplex *z) { + *out = cexp(*z); +} + +static inline void z_sin(doublecomplex *out, doublecomplex *z) { + *out = csin(*z); +} + +static inline void z_cos(doublecomplex *out, doublecomplex *z) { + *out = ccos(*z); +} + +/* Start of functions borrowed from cdjseval3d.f */ + +/* Computation of spherical Bessel functions via recurrence */ + +/* ********************************************************************** */ +/* Subroutine */ int jfuns3d_(integer *ier, integer *nterms, doublecomplex * + z__, doublereal *scale, doublecomplex *fjs, integer *ifder, + doublecomplex *fjder, integer *lwfjs, integer *iscale, integer *ntop) +{ + /* Initialized data */ + + static doublereal upbound2 = 1e40; + static doublereal upbound2inv = 1e-40; + static doublereal tiny = 1e-200; + static doublereal done = 1.; + static doublereal zero = 0.; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + doublecomplex z__1; + + /* Local variables */ + integer i__; + doublereal d0, d1, dd, dc1, dc2; + doublecomplex fj0, fj1, zinv, ztmp; + doublereal dcoef; + doublereal sctot; + doublecomplex zscale; + doublereal scalinv; + +/* ********************************************************************** */ + +/* PURPOSE: */ + +/* This subroutine evaluates the first NTERMS spherical Bessel */ +/* functions and if required, their derivatives. */ +/* It incorporates a scaling parameter SCALE so that */ + +/* fjs_n(z)=j_n(z)/SCALE^n */ +/* fjder_n(z)=\frac{\partial fjs_n(z)}{\partial z} */ + +/* NOTE: The scaling parameter SCALE is meant to be used when */ +/* abs(z) < 1, in which case we recommend setting */ +/* SCALE = abs(z). This prevents the fjs_n from */ +/* underflowing too rapidly. */ +/* Otherwise, set SCALE=1. */ +/* Do not set SCALE = abs(z) if z could take on the */ +/* value zero. */ +/* In an FMM, when forming an expansion from a collection of */ +/* sources, set SCALE = min( abs(k*r), 1) */ +/* where k is the Helmholtz parameter and r is the box dimension */ +/* at the relevant level. */ + +/* INPUT: */ + +/* nterms (integer): order of expansion of output array fjs */ +/* z (complex *16): argument of the spherical Bessel functions */ +/* scale (real *8) : scaling factor (discussed above) */ +/* ifder (integer): flag indicating whether to calculate "fjder" */ +/* 0 NO */ +/* 1 YES */ +/* lwfjs (integer): upper limit of input arrays */ +/* fjs(0:lwfjs) and iscale(0:lwfjs) */ +/* iscale (integer): integer workspace used to keep track of */ +/* internal scaling */ + +/* OUTPUT: */ + +/* ier (integer): error return code */ +/* ier=0 normal return; */ +/* ier=8 insufficient array dimension lwfjs */ +/* fjs (complex *16): array of scaled Bessel functions. */ +/* fjder (complex *16): array of derivs of scaled Bessel functions. */ +/* ntop (integer) : highest index in arrays fjs that is nonzero */ + +/* NOTE, that fjs and fjder arrays must be at least (nterms+2) */ +/* complex *16 elements long. */ + + + + +/* ... Initializing ... */ + + *ier = 0; + +/* set to asymptotic values if argument is sufficiently small */ + + if (z_abs(z__) < tiny) { + fjs[0] = done; + i__1 = *nterms; + for (i__ = 1; i__ <= i__1; ++i__) { + fjs[i__] = zero; + } + + if (*ifder == 1) { + i__1 = *nterms; + for (i__ = 0; i__ <= i__1; ++i__) { + fjder[i__] = zero; + } + fjder[1] = done / (*scale * 3); + } + + return 0; + } + +/* ... Step 1: recursion up to find ntop, starting from nterms */ + + *ntop = 0; + zinv = done / *z__; + fjs[*nterms] = done; + fjs[*nterms - 1] = zero; + + i__1 = *lwfjs; + for (i__ = *nterms; i__ <= i__1; ++i__) { + dcoef = (i__ << 1) + done; + ztmp = dcoef * zinv * fjs[i__] - fjs[i__ - 1]; + fjs[i__ + 1] = ztmp; + +/* Computing 2nd power */ + d__1 = creal(ztmp); +/* Computing 2nd power */ + d__2 = cimag(ztmp); + dd = d__1 * d__1 + d__2 * d__2; + if (dd > upbound2) { + *ntop = i__ + 1; + break; + } + } + if (*ntop == 0) { + *ier = 8; + return 0; + } + +/* ... Step 2: Recursion back down to generate the unscaled jfuns: */ +/* if magnitude exceeds UPBOUND2, rescale and continue the */ +/* recursion (saving the order at which rescaling occurred */ +/* in array iscale. */ + + i__1 = *ntop; + for (i__ = 0; i__ <= i__1; ++i__) { + iscale[i__] = 0; + } + + fjs[*ntop] = zero; + fjs[*ntop - 1] = done; + for (i__ = *ntop - 1; i__ >= 1; --i__) { + dcoef = (i__ << 1) + done; + ztmp = dcoef * zinv * fjs[i__] - fjs[i__ + 1]; + fjs[i__ - 1] = ztmp; + +/* Computing 2nd power */ + d__1 = creal(ztmp); +/* Computing 2nd power */ + d__2 = cimag(ztmp); + dd = d__1 * d__1 + d__2 * d__2; + if (dd > upbound2) { + fjs[i__] *= upbound2inv; + fjs[i__ - 1] *= upbound2inv; + iscale[i__] = 1; + } +/* L2200: */ + } + +/* ... Step 3: go back up to the top and make sure that all */ +/* Bessel functions are scaled by the same factor */ +/* (i.e. the net total of times rescaling was invoked */ +/* on the way down in the previous loop). */ +/* At the same time, add scaling to fjs array. */ + + scalinv = done / *scale; + sctot = 1.; + i__1 = *ntop; + for (i__ = 1; i__ <= i__1; ++i__) { + sctot *= scalinv; + if (iscale[i__ - 1] == 1) { + sctot *= upbound2inv; + } + fjs[i__] *= sctot; + } + +/* ... Determine the normalization parameter: */ + + z_sin(&z__1, z__); + fj0 = z__1 * zinv; + z_cos(&z__1, z__); + fj1 = fj0 * zinv - z__1 * zinv; + + d0 = z_abs(&fj0); + d1 = z_abs(&fj1); + if (d1 > d0) { + zscale = fj1 / (fjs[1] * *scale); + } else { + zscale = fj0 / fjs[0]; + } + +/* ... Scale the jfuns by zscale: */ + + ztmp = zscale; + i__1 = *nterms; + for (i__ = 0; i__ <= i__1; ++i__) { + fjs[i__] *= ztmp; + } + +/* ... Finally, calculate the derivatives if desired: */ + + if (*ifder == 1) { + fjs[*nterms + 1] *= ztmp; + + fjder[0] = -fjs[1] * *scale; + i__1 = *nterms; + for (i__ = 1; i__ <= i__1; ++i__) { + dc1 = i__ / ((i__ << 1) + done); + dc2 = done - dc1; + dc1 *= scalinv; + dc2 *= *scale; + fjder[i__] = dc1 * fjs[i__ - 1] - dc2 * fjs[i__ + 1]; + } + } + return 0; +} /* jfuns3d_ */ + +/* Start of functions borrowed from helmrouts3d.f */ + +/* This file contains the basic subroutines for */ +/* forming and evaluating multipole (partial wave) expansions. */ + +/* Documentation is incomplete and ongoing... */ + + +/* Remarks on scaling conventions. */ + +/* 1) Hankel and Bessel functions are consistently scaled as */ +/* hvec(n)= h_n(z)*scale^(n) */ +/* jvec(n)= j_n(z)/scale^(n) */ + +/* In some earlier FMM implementations, the convention */ +/* hvec(n)= h_n(z)*scale^(n+1) */ +/* was sometimes used, leading to various obscure rescaling */ +/* steps. */ + +/* scale should be of the order of |z| if |z| < 1. Otherwise, */ +/* scale should be set to 1. */ + + +/* 2) There are many definitions of the spherical harmonics, */ +/* which differ in terms of normalization constants. We */ +/* adopt the following convention: */ + +/* For m>0, we define Y_n^m according to */ + +/* Y_n^m = \sqrt{2n+1} \sqrt{\frac{ (n-m)!}{(n+m)!}} \cdot */ +/* P_n^m(\cos \theta) e^{i m phi} */ +/* and */ + +/* Y_n^-m = dconjg( Y_n^m ) */ + +/* We omit the Condon-Shortley phase factor (-1)^m in the */ +/* definition of Y_n^m for m<0. (This is standard in several */ +/* communities.) */ + +/* We also omit the factor \sqrt{\frac{1}{4 \pi}}, so that */ +/* the Y_n^m are orthogonal on the unit sphere but not */ +/* orthonormal. (This is also standard in several communities.) */ +/* More precisely, */ + +/* \int_S Y_n^m Y_n^m d\Omega = 4 \pi. */ + +/* Using our standard definition, the addition theorem takes */ +/* the simple form */ + +/* e^( i k r}/(ikr) = */ +/* \sum_n \sum_m j_n(k|S|) Ylm*(S) h_n(k|T|) Ylm(T) */ + + +/* ----------------------------------------------------------------------- */ +/* h3d01: computes h0, h1 (first two spherical Hankel fns.) */ +/* h3dall: computes Hankel functions of all orders and scales them */ +/* ********************************************************************** */ +/* Subroutine */ static int h3d01_(doublecomplex *z__, doublecomplex *h0, + doublecomplex *h1) +{ + /* Initialized data */ + + static doublecomplex eye = I; + static doublereal thresh = 1e-15; + static doublereal done = 1.; + + /* System generated locals */ + doublecomplex z__1; + + /* Local variables */ + doublecomplex cd; + +/* ********************************************************************** */ + +/* Compute spherical Hankel functions of order 0 and 1 */ + +/* h0(z) = exp(i*z)/(i*z), */ +/* h1(z) = - h0' = -h0*(i-1/z) = h0*(1/z-i) */ + +/* ----------------------------------------------------------------------- */ +/* INPUT: */ + +/* z : argument of Hankel functions */ +/* if abs(z)<1.0d-15, returns zero. */ + +/* ----------------------------------------------------------------------- */ +/* OUTPUT: */ + +/* h0 : h0(z) (spherical Hankel function of order 0). */ +/* h1 : -h0'(z) (spherical Hankel function of order 1). */ + +/* ----------------------------------------------------------------------- */ + + if (z_abs(z__) < thresh) { + *h0 = 0.; + *h1 = 0.; + return 0; + } + +/* Otherwise, use formula */ + + cd = eye * *z__; + z_exp(&z__1, &cd); + *h0 = z__1 / cd; + *h1 = *h0 * (done / *z__ - eye); + + return 0; +} /* h3d01_ */ + + + + +/* ********************************************************************** */ +/* Subroutine */ int h3dall_(integer *nterms, doublecomplex *z__, doublereal * + scale, doublecomplex *hvec, integer *ifder, doublecomplex *hder) +{ + /* Initialized data */ + static doublereal thresh = 1e-15; + static doublereal done = 1.; + + /* Builtin functions */ + double z_abs(doublecomplex *); + + /* Local variables */ + integer i__; + integer i__1; + doublereal dtmp; + doublecomplex zinv, ztmp; + doublereal scal2; + +/* ********************************************************************** */ + +/* This subroutine computes scaled versions of the spherical Hankel */ +/* functions h_n of orders 0 to nterms. */ + +/* hvec(n)= h_n(z)*scale^(n) */ + +/* The parameter SCALE is useful when |z| < 1, in which case */ +/* it damps out the rapid growth of h_n as n increases. In such */ +/* cases, we recommend setting */ + +/* scale = |z| */ + +/* or something close. If |z| > 1, set scale = 1. */ + +/* If the flag IFDER is set to one, it also computes the */ +/* derivatives of h_n. */ + +/* hder(n)= h_n'(z)*scale^(n) */ + +/* NOTE: If |z| < 1.0d-15, the subroutine returns zero. */ + +/* ----------------------------------------------------------------------- */ +/* INPUT: */ + +/* nterms : highest order of the Hankel functions to be computed. */ +/* z : argument of the Hankel functions. */ +/* scale : scaling parameter discussed above */ +/* ifder : flag indcating whether derivatives should be computed. */ +/* ifder = 1 ==> compute */ +/* ifder = 0 ==> do not compute */ + +/* ----------------------------------------------------------------------- */ +/* OUTPUT: */ + +/* hvec : the vector of spherical Hankel functions */ +/* hder : the derivatives of the spherical Hankel functions */ + +/* ----------------------------------------------------------------------- */ + + +/* If |z| < thresh, return zeros. */ + + if (z_abs(z__) < thresh) { + i__1 = *nterms; + for (i__ = 0; i__ <= i__1; ++i__) { + hvec[i__] = 0; + hder[i__] = 0; + } + return 0; + } + +/* Otherwise, get h_0 and h_1 analytically and the rest via */ +/* recursion. */ + + h3d01_(z__, hvec, &hvec[1]); + hvec[0] = hvec[0]; + hvec[1] *= *scale; + +/* From Abramowitz and Stegun (10.1.19) */ + +/* h_{n+1}(z)=(2n+1)/z * h_n(z) - h_{n-1}(z) */ + +/* With scaling: */ + +/* hvec(n+1)=scale*(2n+1)/z * hvec(n) -(scale**2) hvec(n-1) */ + + scal2 = *scale * *scale; + zinv = *scale / *z__; + i__1 = *nterms - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dtmp = (i__ << 1) + done; + ztmp = zinv * dtmp; + hvec[i__ + 1] = ztmp * hvec[i__] - scal2 * hvec[i__ - 1]; + } + +/* From Abramowitz and Stegun (10.1.21) */ + +/* h_{n}'(z)= h_{n-1}(z) - (n+1)/z * h_n(z) */ + +/* With scaling: */ + +/* hder(n)=scale* hvec(n-1) - (n+1)/z * hvec(n) */ + + + if (*ifder == 1) { + + hder[0] = -hvec[1] / *scale; + zinv = 1. / *z__; + i__1 = *nterms; + for (i__ = 1; i__ <= i__1; ++i__) { + dtmp = i__ + done; + ztmp = zinv * dtmp; + hder[i__] = *scale * hvec[i__ - 1] - ztmp * hvec[i__]; + } + } + + return 0; +} /* h3dall_ */ diff --git a/pytential/qbx/target_specific/helmholtz_utils.h b/pytential/qbx/target_specific/helmholtz_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..d36b3b1206830f43d31c67551aab9b00dbd80252 --- /dev/null +++ b/pytential/qbx/target_specific/helmholtz_utils.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2018 Matt Wala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HELMHOLTZ_UTILS_H +#define HELMHOLTZ_UTILS_H + +#include + +extern int jfuns3d_(int *ier, int *nterms, double complex *z, + double *scale, double complex *fjs, int *ifder, + double complex *fjder, int *lwfjs, int *iscale, + int *ntop); + +extern int h3dall_(int *nterms, double complex *z, double *scale, + double complex *hvec, int *ifder, double complex *hder); + +#endif /* HELMHOLTZ_UTILS_H */ diff --git a/pytential/qbx/target_specific/impl.h b/pytential/qbx/target_specific/impl.h new file mode 100644 index 0000000000000000000000000000000000000000..4d7281b4d3e45cafdc74c2243bf2ca5dbca90abd --- /dev/null +++ b/pytential/qbx/target_specific/impl.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2018 Matt Wala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef IMPL_H +#define IMPL_H + +/* Temporary buffer size for holding e.g. Legendre polynomial values */ +#define BUFSIZE 64 + +/* Padding for false sharing prevention */ +#define PADDING 65 + +#endif /* IMPL_H */ diff --git a/pytential/qbx/target_specific/impl.pyx b/pytential/qbx/target_specific/impl.pyx new file mode 100644 index 0000000000000000000000000000000000000000..37884f3f522af70946d82b5ea07974aa617ace28 --- /dev/null +++ b/pytential/qbx/target_specific/impl.pyx @@ -0,0 +1,786 @@ +# cython: warn.unused=True, warn.unused_arg=True, warn.unreachable=True +# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True +# cython: embedsignature=True, language_level=3 + +"""Copyright (C) 2018 Matt Wala + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +import cython +import cython.parallel + +from libc.math cimport sqrt +from libc.stdio cimport printf, fprintf, stderr +from libc.stdlib cimport abort + +cimport openmp + + +cdef extern from "complex.h" nogil: + double cabs(double complex) + + +cdef extern from "helmholtz_utils.h" nogil: + int jfuns3d_(int *ier, int *nterms, double complex * z, double *scale, + double complex *fjs, int *ifder, double complex *fjder, + int *lwfjs, int *iscale, int *ntop); + int h3dall_(int *nterms, double complex *z, double *scale, + double complex *hvec, int *ifder, double complex *hder); + + +cdef extern from "impl.h" nogil: + const int BUFSIZE + const int PADDING + + +# {{{ (externally visible) wrappers for bessel / hankel functions + +def jfuns3d_wrapper(nterms, z, scale, fjs, fjder): + """Evaluate spherical Bessel functions. + + Arguments: + nterms: Highest order to be computed + z: Argument + scale: Output scaling factor (recommended: min(abs(z), 1)) + fjs: Output array of complex doubles + fjder: *None*, or output array of complex double derivatives + """ + cdef: + double complex[BUFSIZE] fjstemp + double complex[BUFSIZE] fjdertmp + int[BUFSIZE] iscale + int ier, ifder, lwfjs, ntop, i, nterms_ + double scale_ + double complex z_ + + if nterms <= 0: + raise ValueError("nterms should be positive") + + nterms_ = nterms + z_ = z + scale_ = scale + ifder = fjder is not None + lwfjs = BUFSIZE + + jfuns3d_(&ier, &nterms_, &z_, &scale_, fjstemp, &ifder, fjdertmp, &lwfjs, + iscale, &ntop) + + if ier: + raise ValueError("jfuns3d_ returned error code %d" % ier) + + for i in range(1 + nterms): + fjs[i] = fjstemp[i] + if ifder: + fjder[i] = fjdertmp[i] + + +def h3dall_wrapper(nterms, z, scale, hs, hders): + """Evaluate spherical Hankel functions. + + Arguments: + nterms: Highest order to be computed + z: Argument + scale: Output scaling factor (recommended: min(abs(z), 1)) + hs: Output array of complex doubles + hders: *None*, or output array of complex double derivatives + """ + cdef: + int nterms_, ifder + double scale_ + double complex z_ + double complex[:] hvec = np.empty(1 + nterms, np.complex) + double complex[:] hdervec = np.empty(1 + nterms, np.complex) + + ifder = hders is not None + + if nterms <= 0: + raise ValueError("nterms should be positive") + + z_ = z + scale_ = scale + nterms_ = nterms + + h3dall_(&nterms_, &z_, &scale_, &hvec[0], &ifder, &hdervec[0]) + + hs[:1 + nterms] = hvec[:] + if ifder: + hders[:1 + nterms] = hdervec[:] + +# }}} + + +# {{{ helpers + +cdef void legvals(double x, int n, double[] vals, double[] derivs) nogil: + """Compute the values of the Legendre polynomial up to order n at x. + Optionally, if derivs is non-NULL, compute the values of the derivative too. + + Borrowed from fmmlib. + """ + cdef: + double pj, derj, pjm2, pjm1, derjm2, derjm1 + int j + + pjm2 = 1 + pjm1 = x + + vals[0] = 1 + if derivs != NULL: + derivs[0] = 0 + derjm2 = 0 + derjm1 = 1 + + if n == 0: + return + + vals[1] = x + if derivs != NULL: + derivs[1] = 1 + + if n == 1: + return + + for j in range(2, n + 1): + pj = ( (2*j-1)*x*pjm1-(j-1)*pjm2 ) / j + vals[j] = pj + + if derivs != NULL: + derj = (2*j-1)*(pjm1+x*derjm1)-(j-1)*derjm2 + derj = derj / j + derivs[j] = derj + derjm2 = derjm1 + derjm1 = derj + + pjm2 = pjm1 + pjm1 = pj + + +cdef double dist(double[3] a, double[3] b) nogil: + """Calculate the Euclidean distance between a and b.""" + return sqrt( + (a[0] - b[0]) * (a[0] - b[0]) + + (a[1] - b[1]) * (a[1] - b[1]) + + (a[2] - b[2]) * (a[2] - b[2])) + + +cdef void ts_helmholtz_precompute( + double[3] center, + double[3] target, + int order, + int ifder, + double complex k, + double complex[] jvals, + double complex[] jderivs, + double *jscale) nogil: + """Evaluate the source-invariant Bessel terms of the Helmholtz target-specific + expansion.""" + + cdef: + double complex z + double tc_d + int ier, ntop, lwfjs + int[BUFSIZE] iscale + + tc_d = dist(target, center) + jscale[0] = cabs(k * tc_d) if (cabs(k * tc_d) < 1) else 1 + + # Evaluate the spherical Bessel terms. + z = k * tc_d + lwfjs = BUFSIZE + # jfuns3d_ only supports order > 0 (goes out of bounds if order = 0) + order = max(1, order) + jfuns3d_(&ier, &order, &z, jscale, jvals, &ifder, jderivs, &lwfjs, iscale, + &ntop) + if ier: + # This could in theory fail. + fprintf(stderr, "array passed to jfuns3d_ was too small\n") + abort() + +# }}} + + +# {{{ Laplace S + +cdef double complex ts_laplace_s( + double[3] source, + double[3] center, + double[3] target, + double complex charge, + int order) nogil: + """Evaluate the target-specific expansion of the Laplace single-layer kernel.""" + + cdef: + double j + double result, r, sc_d, tc_d, cos_angle + # Legendre recurrence values + double pj, pjm1, pjm2 + + tc_d = dist(target, center) + sc_d = dist(source, center) + + cos_angle = (( + (target[0] - center[0]) * (source[0] - center[0]) + + (target[1] - center[1]) * (source[1] - center[1]) + + (target[2] - center[2]) * (source[2] - center[2])) + / (tc_d * sc_d)) + + if order == 0: + return charge / sc_d + + pjm2 = 1 + pjm1 = cos_angle + + result = 1 / sc_d + (cos_angle * tc_d) / (sc_d * sc_d) + + r = (tc_d * tc_d) / (sc_d * sc_d * sc_d) + + # Invariant: j matches loop counter. Using a double-precision version of the + # loop counter avoids an int-to-double conversion inside the loop. + j = 2. + + for _ in range(2, order + 1): + pj = ( (2.*j-1.)*cos_angle*pjm1-(j-1.)*pjm2 ) / j + result += pj * r + + r *= (tc_d / sc_d) + j += 1 + pjm2 = pjm1 + pjm1 = pj + + return charge * result + +# }}} + + +# {{{ Laplace grad(S) + +cdef void ts_laplace_sp( + double complex[3] grad, + double[3] source, + double[3] center, + double[3] target, + double complex charge, + int order) nogil: + """Evaluate the target-specific expansion of the gradient of the Laplace + single-layer kernel.""" + + cdef: + double[3] grad_tmp + double sc_d, tc_d, cos_angle, Rn + double[BUFSIZE] lvals, lderivs + double[3] smc, tmc + int n + + for m in range(3): + smc[m] = source[m] - center[m] + tmc[m] = target[m] - center[m] + grad_tmp[m] = 0 + + tc_d = dist(target, center) + sc_d = dist(source, center) + + cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d) + legvals(cos_angle, order, lvals, lderivs) + + # Invariant: Rn = tc_d ** (n - 1) / sc_d ** (n + 1) + Rn = 1 / (sc_d * sc_d) + + for n in range(1, 1 + order): + for m in range(3): + grad_tmp[m] += Rn * ( + n * (tmc[m] / tc_d) * lvals[n] + + (smc[m] / sc_d - cos_angle * tmc[m] / tc_d) * lderivs[n]) + Rn *= tc_d / sc_d + + for m in range(3): + grad[m] += charge * grad_tmp[m] + +# }}} + + +# {{{ Laplace D + +cdef double complex ts_laplace_d( + double[3] source, + double[3] center, + double[3] target, + double[3] dipole, + double complex dipstr, + int order) nogil: + """Evaluate the target-specific expansion of the Laplace double-layer kernel.""" + + cdef: + int n, m + double sc_d, tc_d, cos_angle, Rn + double[BUFSIZE] lvals, lderivs + double[3] smc, tmc, grad + + for m in range(3): + smc[m] = source[m] - center[m] + tmc[m] = target[m] - center[m] + grad[m] = 0 + + tc_d = dist(target, center) + sc_d = dist(source, center) + + cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d) + legvals(cos_angle, order, lvals, lderivs) + + # Invariant: Rn = (tc_d ** n / sc_d ** (n + 2)) + Rn = 1 / (sc_d * sc_d) + + for n in range(0, order + 1): + for m in range(3): + grad[m] += Rn * ( + -(n + 1) * (smc[m] / sc_d) * lvals[n] + + (tmc[m] / tc_d - cos_angle * smc[m] / sc_d) * lderivs[n]) + Rn *= (tc_d / sc_d) + + return dipstr * ( + dipole[0] * grad[0] + dipole[1] * grad[1] + dipole[2] * grad[2]) + +# }}} + + +# {{{ Helmholtz S + +cdef double complex ts_helmholtz_s( + double[3] source, + double[3] center, + double[3] target, + double complex charge, + int order, + double complex k, + double complex[] jvals, + double jscale) nogil: + """Evaluate the target-specific expansion of the Helmholtz single-layer + kernel.""" + + cdef: + int n, ifder + double sc_d, tc_d, cos_angle + double[BUFSIZE] lvals + double complex[BUFSIZE] hvals + double hscale, unscale + double complex z, result + + tc_d = dist(target, center) + sc_d = dist(source, center) + + cos_angle = (( + (target[0] - center[0]) * (source[0] - center[0]) + + (target[1] - center[1]) * (source[1] - center[1]) + + (target[2] - center[2]) * (source[2] - center[2])) + / (tc_d * sc_d)) + + # Evaluate the Legendre terms. + legvals(cos_angle, order, lvals, NULL) + + # Scaling magic for Hankel terms. + # These values are taken from the fmmlib documentation. + hscale = cabs(k * sc_d) if (cabs(k * sc_d) < 1) else 1 + # unscale = (jscale / hscale) ** n + # Multiply against unscale to remove the scaling. + unscale = 1 + + # Evaluate the spherical Hankel terms. + z = k * sc_d + ifder = 0 + h3dall_(&order, &z, &hscale, hvals, &ifder, NULL) + + result = 0 + + for n in range(1 + order): + result += (2 * n + 1) * unscale * (jvals[n] * hvals[n] * lvals[n]) + unscale *= jscale / hscale + + return 1j * k * charge * result + +# }}} + + +# {{{ Helmholtz grad(S) + +cdef void ts_helmholtz_sp( + double complex[3] grad, + double[3] source, + double[3] center, + double[3] target, + double complex charge, + int order, + double complex k, + double complex[] jvals, + double complex[] jderivs, + double jscale) nogil: + """Evaluate the target-specific expansion of the gradient of the Helmholtz + single-layer kernel.""" + + cdef: + int n, m + int ifder + double sc_d, tc_d, cos_angle + double[3] smc, tmc + double complex[3] grad_tmp + double[BUFSIZE] lvals, lderivs + double complex z + double complex [BUFSIZE] hvals + double hscale, unscale + + for m in range(3): + smc[m] = source[m] - center[m] + tmc[m] = target[m] - center[m] + grad_tmp[m] = 0 + + tc_d = dist(target, center) + sc_d = dist(source, center) + + # Evaluate the Legendre terms. + cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d) + legvals(cos_angle, order, lvals, lderivs) + + # Scaling magic for Hankel terms. + # These values are taken from the fmmlib documentation. + hscale = cabs(k * sc_d) if (cabs(k * sc_d) < 1) else 1 + # unscale = (jscale / hscale) ** n + # Multiply against unscale to remove the scaling. + unscale = 1 + + # Evaluate the spherical Hankel terms. + z = k * sc_d + ifder = 0 + h3dall_(&order, &z, &hscale, hvals, &ifder, NULL) + + # + # This is a mess, but amounts to the t-gradient of: + # + # __ order + # ik \ (2n + 1) j (k |t - c|) h (k |s - c|) P (cos θ) + # /__ n = 0 n n n + # + # + for n in range(0, order + 1): + for m in range(3): + grad_tmp[m] += (2 * n + 1) * unscale * hvals[n] / tc_d * ( + k * jderivs[n] * lvals[n] * tmc[m] + + (smc[m] / sc_d - cos_angle * tmc[m] / tc_d) + * jvals[n] * lderivs[n]) + unscale *= jscale / hscale + + for m in range(3): + grad[m] += 1j * k * charge * grad_tmp[m] + +# }}} + + +# {{{ Helmholtz D + +cdef double complex ts_helmholtz_d( + double[3] source, + double[3] center, + double[3] target, + double[3] dipole, + double complex dipstr, + int order, + double complex k, + double complex[] jvals, + double jscale) nogil: + """Evaluate the target-specific expansion of the Helmholtz double-layer + kernel.""" + + cdef: + int n, m + int ifder + double sc_d, tc_d, cos_angle + double[3] smc, tmc + double complex[3] grad + double[BUFSIZE] lvals, lderivs + double complex z + double complex [BUFSIZE] hvals, hderivs + double hscale, unscale + + for m in range(3): + smc[m] = source[m] - center[m] + tmc[m] = target[m] - center[m] + grad[m] = 0 + + tc_d = dist(target, center) + sc_d = dist(source, center) + + cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d) + + # Evaluate the Legendre terms. + legvals(cos_angle, order, lvals, lderivs) + + # Scaling magic for Hankel terms. + # These values are taken from the fmmlib documentation. + hscale = cabs(k * sc_d) if (cabs(k * sc_d) < 1) else 1 + # unscale = (jscale / hscale) ** n + # Multiply against unscale to remove the scaling. + unscale = 1 + + # Evaluate the spherical Hankel terms. + z = k * sc_d + ifder = 1 + h3dall_(&order, &z, &hscale, hvals, &ifder, hderivs) + + # + # This is a mess, but amounts to the s-gradient of: + # + # __ order + # ik \ (2n + 1) j (k |t - c|) h (k |s - c|) P (cos θ) + # /__ n = 0 n n n + # + # + for n in range(0, order + 1): + for m in range(3): + grad[m] += (2 * n + 1) * unscale * jvals[n] / sc_d * ( + k * smc[m] * hderivs[n] * lvals[n] + + (tmc[m] / tc_d - cos_angle * smc[m] / sc_d) + * hvals[n] * lderivs[n]) + unscale *= jscale / hscale + + return 1j * k * dipstr * ( + grad[0] * dipole[0] + grad[1] * dipole[1] + grad[2] * dipole[2]) + +# }}} + + +def eval_target_specific_qbx_locals( + int ifpot, + int ifgrad, + int ifcharge, + int ifdipole, + int order, + double[:,:] sources, + double[:,:] targets, + double[:,:] centers, + int[:] qbx_centers, + int[:] qbx_center_to_target_box, + int[:] center_to_target_starts, int[:] center_to_target_lists, + int[:] source_box_starts, int[:] source_box_lists, + int[:] box_source_starts, int[:] box_source_counts_nonchild, + double complex helmholtz_k, + double complex[:] charge, + double complex[:] dipstr, + double[:,:] dipvec, + double complex[:] pot, + double complex[:,:] grad): + """TSQBX entry point. + + Arguments: + ifpot: Flag indicating whether to evaluate the potential + ifgrad: Flag indicating whether to evaluate the gradient of the potential + ifcharge: Flag indicating whether to include monopole sources + ifdipole: Flag indicating whether to include dipole sources + order: Expansion order + sources: Array of sources of shape (3, *nsrcs*) + targets: Array of targets of shape (3, *ntgts*) + centers: Array of centers of shape (3, *nctrs*) + qbx_centers: Array of subset of indices into *centers* which are QBX centers + qbx_center_to_target_box: Array mapping centers to target box numbers + center_to_target_starts: "Start" indices for center-to-target CSR list + center_to_target_lists: Center-to-target CSR list + source_box_starts: "Start" indices for target-box-to-source-box CSR list + source_box_lists: Target-box-to-source-box CSR list + box_source_starts: "Start" indices for sources for each box + box_source_counts_nonchild: Number of sources per box + helmholtz_k: Helmholtz parameter (Pass 0 for Laplace) + charge: (Complex) Source strengths, shape (*nsrcs*,), or *None* + dipstr: (Complex) Dipole source strengths, shape (*nsrcs*,) or *None* + dipvec: (Real) Dipole source orientations, shape (3, *nsrcs*), or *None* + pot: (Complex) Output potential, shape (*ngts*,), or *None* + grad: (Complex) Output gradient, shape (3, *ntgts*), or *None* + """ + + cdef: + int tgt, ictr, ctr + int itgt, itgt_start, itgt_end + int tgt_box, src_ibox + int isrc_box, isrc_box_start, isrc_box_end + int isrc, isrc_start, isrc_end + int tid, m + double jscale + double complex result + double[:,:] source, center, target, dipole + double complex[:,:] result_grad, jvals, jderivs + int laplace_s, helmholtz_s, laplace_sp, helmholtz_sp, laplace_d, helmholtz_d + + # {{{ process arguments + + if ifcharge: + if charge is None: + raise ValueError("Missing charge") + + if ifdipole: + if dipstr is None: + raise ValueError("Missing dipstr") + if dipvec is None: + raise ValueError("Missing dipvec") + + if ifdipole and ifgrad: + raise ValueError("Does not support computing gradient of dipole sources") + + if helmholtz_k == 0: + helmholtz_s = helmholtz_sp = helmholtz_d = 0 + + if ifpot: + laplace_s = ifcharge + laplace_d = ifdipole + + if ifgrad: + laplace_sp = ifcharge + + else: + laplace_s = laplace_sp = laplace_d = 0 + + if ifpot: + helmholtz_s = ifcharge + helmholtz_d = ifdipole + + if ifgrad: + helmholtz_sp = ifcharge + + # }}} + + if not any([ + laplace_s, laplace_sp, laplace_d, helmholtz_s, helmholtz_sp, + helmholtz_d]): + return + + if qbx_centers.shape[0] == 0: + return + + # {{{ set up thread-local storage + + # Hack to obtain thread-local storage + maxthreads = openmp.omp_get_max_threads() + + # Prevent false sharing by padding the thread-local buffers + source = np.zeros((maxthreads, PADDING)) + target = np.zeros((maxthreads, PADDING)) + center = np.zeros((maxthreads, PADDING)) + dipole = np.zeros((maxthreads, PADDING)) + result_grad = np.zeros((maxthreads, PADDING), dtype=np.complex) + jvals = np.zeros((maxthreads, BUFSIZE + PADDING), dtype=np.complex) + jderivs = np.zeros((maxthreads, BUFSIZE + PADDING), dtype=np.complex) + + # TODO: Check that the order is not too high, since temporary + # arrays in this module that are limited by BUFSIZE may overflow + # if that is the case + + # }}} + + for ictr in cython.parallel.prange(0, qbx_centers.shape[0], + nogil=True, schedule="static", + chunksize=128): + # Assign to jscale so Cython marks it as private + jscale = 0 + ctr = qbx_centers[ictr] + itgt_start = center_to_target_starts[ctr] + itgt_end = center_to_target_starts[ctr + 1] + tgt_box = qbx_center_to_target_box[ctr] + tid = cython.parallel.threadid() + + for m in range(3): + center[tid, m] = centers[m, ctr] + + for itgt in range(itgt_start, itgt_end): + result = 0 + tgt = center_to_target_lists[itgt] + + for m in range(3): + target[tid, m] = targets[m, tgt] + if ifgrad: + result_grad[tid, m] = 0 + + if helmholtz_s or helmholtz_sp or helmholtz_d: + # Precompute source-invariant Helmholtz terms. + ts_helmholtz_precompute( + ¢er[tid, 0], &target[tid, 0], + order, ifgrad, helmholtz_k, &jvals[tid, 0], + &jderivs[tid, 0], &jscale) + + isrc_box_start = source_box_starts[tgt_box] + isrc_box_end = source_box_starts[tgt_box + 1] + + for isrc_box in range(isrc_box_start, isrc_box_end): + src_ibox = source_box_lists[isrc_box] + isrc_start = box_source_starts[src_ibox] + isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] + + for isrc in range(isrc_start, isrc_end): + + for m in range(3): + source[tid, m] = sources[m, isrc] + if ifdipole: + dipole[tid, m] = dipvec[m, isrc] + + # NOTE: Don't use +=, since that makes Cython think we are + # doing an OpenMP reduction. + + # {{{ evaluate potentials + + if laplace_s: + result = result + ( + ts_laplace_s( + &source[tid, 0], ¢er[tid, 0], &target[tid, 0], + charge[isrc], order)) + + if laplace_sp: + ts_laplace_sp( + &result_grad[tid, 0], + &source[tid, 0], ¢er[tid, 0], &target[tid, 0], + charge[isrc], order) + + if laplace_d: + result = result + ( + ts_laplace_d( + &source[tid, 0], ¢er[tid, 0], &target[tid, 0], + &dipole[tid, 0], dipstr[isrc], order)) + + if helmholtz_s: + result = result + ( + ts_helmholtz_s(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], charge[isrc], order, helmholtz_k, + &jvals[tid, 0], jscale)) + + if helmholtz_sp: + ts_helmholtz_sp( + &result_grad[tid, 0], + &source[tid, 0], ¢er[tid, 0], &target[tid, 0], + charge[isrc], order, helmholtz_k, + &jvals[tid, 0], &jderivs[tid, 0], jscale) + + if helmholtz_d: + result = result + ( + ts_helmholtz_d( + &source[tid, 0], ¢er[tid, 0], &target[tid, 0], + &dipole[tid, 0], dipstr[isrc], order, helmholtz_k, + &jvals[tid, 0], jscale)) + + # }}} + + if ifpot: + pot[tgt] = result + + if ifgrad: + for m in range(3): + grad[m, tgt] = result_grad[tid, m] diff --git a/pytential/qbx/utils.py b/pytential/qbx/utils.py index b0ffc066035c0c8f1aea690ecc5dcb2618367275..ca8b08df88a3fd10364a4c317bf779b1b134c7b5 100644 --- a/pytential/qbx/utils.py +++ b/pytential/qbx/utils.py @@ -601,4 +601,70 @@ def build_tree_with_qbx_metadata( # }}} + +# {{{ host geo data wrapper + +class ToHostTransferredGeoDataWrapper(object): + """Wraps an instance of :class:`pytential.qbx.geometry.QBXFMMGeometryData`, + automatically converting returned OpenCL arrays to host data. + """ + + def __init__(self, queue, geo_data): + self.queue = queue + self.geo_data = geo_data + + @memoize_method + def tree(self): + return self.geo_data.tree().get(queue=self.queue) + + @memoize_method + def traversal(self): + return self.geo_data.traversal().get(queue=self.queue) + + @property + def lpot_source(self): + return self.geo_data.lpot_source + + @property + def ncenters(self): + return self.geo_data.ncenters + + @memoize_method + def centers(self): + return np.array([ + ci.get(queue=self.queue) + for ci in self.geo_data.centers()]) + + @memoize_method + def expansion_radii(self): + return self.geo_data.expansion_radii().get(queue=self.queue) + + @memoize_method + def global_qbx_centers(self): + return self.geo_data.global_qbx_centers().get(queue=self.queue) + + @memoize_method + def qbx_center_to_target_box(self): + return self.geo_data.qbx_center_to_target_box().get(queue=self.queue) + + @memoize_method + def qbx_center_to_target_box_source_level(self, source_level): + return self.geo_data.qbx_center_to_target_box_source_level( + source_level).get(queue=self.queue) + + @memoize_method + def non_qbx_box_target_lists(self): + return self.geo_data.non_qbx_box_target_lists().get(queue=self.queue) + + @memoize_method + def center_to_tree_targets(self): + return self.geo_data.center_to_tree_targets().get(queue=self.queue) + + @memoize_method + def all_targets(self): + """All (not just non-QBX) targets packaged into a single array.""" + return np.array(list(self.tree().targets)) + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/setup.py b/setup.py index e2a1ae90b75867829508b789fbe340e15b29c426..01a241b138f70c77e2b40f9451db72870094189e 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,8 @@ import os from setuptools import setup, find_packages +from setuptools.extension import Extension +from Cython.Build import cythonize # {{{ capture git revision at install time @@ -54,6 +56,21 @@ write_git_revision("pytential") # }}} +ext_modules = [ + Extension( + "pytential.qbx.target_specific.impl", + sources=[ + "pytential/qbx/target_specific/impl.pyx", + "pytential/qbx/target_specific/helmholtz_utils.c"], + depends=[ + "pytential/qbx/target_specific/impl.h", + "pytential/qbx/target_specific/helmholtz_utils.h"], + extra_compile_args=["-Wall", "-fopenmp", "-Ofast"], + extra_link_args=["-fopenmp"] + ), +] + + version_dict = {} init_filename = "pytential/version.py" os.environ["AKPYTHON_EXEC_FROM_WITHIN_WITHIN_SETUP_PY"] = "1" @@ -91,6 +108,8 @@ setup(name="pytential", packages=find_packages(), + ext_modules=cythonize(ext_modules), + install_requires=[ "pytest>=2.3", # FIXME leave out for now diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py index 4b5d70b675871031023d3f5cf40c049c008d0f5b..d6660784042c982162df6cb6f68a5b3670fd21a0 100644 --- a/test/test_layer_pot.py +++ b/test/test_layer_pot.py @@ -183,7 +183,7 @@ def test_off_surface_eval_vs_direct(ctx_getter, do_plot=False): target_association_tolerance=0.05, ).with_refinement() - fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1000) + fplot = FieldPlotter(np.zeros(2), extent=5, npoints=500) from pytential.target import PointsTarget ptarget = PointsTarget(fplot.points) from sumpy.kernel import LaplaceKernel diff --git a/test/test_layer_pot_identity.py b/test/test_layer_pot_identity.py index c376fdf3e5072aba8604147c70a4f5d918c15f33..a43d2caf78b066218d622f39e250e1226d285891 100644 --- a/test/test_layer_pot_identity.py +++ b/test/test_layer_pot_identity.py @@ -233,32 +233,38 @@ class DynamicTestCase(object): if (self.geometry.mesh_name == "sphere" and self.k != 0 and self.fmm_backend == "sumpy"): - pytest.skip("both direct eval and generating the FMM kernels " + raise ValueError("both direct eval and generating the FMM kernels " "are too slow") if (self.geometry.mesh_name == "sphere" and self.expr.zero_op_name == "green_grad"): - pytest.skip("does not achieve sufficient precision") + raise ValueError("does not achieve sufficient precision") # {{{ integral identity tester + +@pytest.mark.slowtest +@pytest.mark.parametrize("case", [ + DynamicTestCase(SphereGeometry(), GreenExpr(), 0), +]) +def test_identity_convergence_slow(ctx_getter, case): + test_identity_convergence(ctx_getter, case) + + @pytest.mark.parametrize("case", [ - tc - for geom in [ - StarfishGeometry(), - SphereGeometry(), - ] - for tc in [ - DynamicTestCase(geom, GreenExpr(), 0), - DynamicTestCase(geom, GreenExpr(), 1.2), - DynamicTestCase(geom, GradGreenExpr(), 0), - DynamicTestCase(geom, GradGreenExpr(), 1.2), - DynamicTestCase(geom, ZeroCalderonExpr(), 0), - - DynamicTestCase(geom, GreenExpr(), 0, fmm_backend="fmmlib"), - DynamicTestCase(geom, GreenExpr(), 1.2, fmm_backend="fmmlib"), - ]]) + # 2d + DynamicTestCase(StarfishGeometry(), GreenExpr(), 0), + DynamicTestCase(StarfishGeometry(), GreenExpr(), 1.2), + DynamicTestCase(StarfishGeometry(), GradGreenExpr(), 0), + DynamicTestCase(StarfishGeometry(), GradGreenExpr(), 1.2), + DynamicTestCase(StarfishGeometry(), ZeroCalderonExpr(), 0), + DynamicTestCase(StarfishGeometry(), GreenExpr(), 0, fmm_backend="fmmlib"), + DynamicTestCase(StarfishGeometry(), GreenExpr(), 1.2, fmm_backend="fmmlib"), + # 3d + DynamicTestCase(SphereGeometry(), GreenExpr(), 0, fmm_backend="fmmlib"), + DynamicTestCase(SphereGeometry(), GreenExpr(), 1.2, fmm_backend="fmmlib") +]) def test_identity_convergence(ctx_getter, case, visualize=False): logging.basicConfig(level=logging.INFO) diff --git a/test/test_target_specific_qbx.py b/test/test_target_specific_qbx.py new file mode 100644 index 0000000000000000000000000000000000000000..34091af9a91e26f526ea3f2b371515b0c8992029 --- /dev/null +++ b/test/test_target_specific_qbx.py @@ -0,0 +1,219 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2018 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import numpy.linalg as la # noqa +import pyopencl as cl +import pyopencl.clmath as clmath +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from functools import partial # noqa +from meshmode.mesh.generation import ( # noqa + ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle, + NArmedStarfish, + make_curve_mesh) + +from pytential import bind, sym, norm # noqa +from sumpy.kernel import LaplaceKernel, HelmholtzKernel + +import logging +logger = logging.getLogger(__name__) + + +def test_spherical_bessel_functions(): + import pytential.qbx.target_specific as ts + + nterms = 9 + z = 3j + scale = 1 + j = np.zeros(1 + nterms, dtype=np.complex) + jder = np.zeros(1 + nterms, dtype=np.complex) + ts.jfuns3d_wrapper(nterms, z, scale, j, jder) + + # Reference solution computed using scipy.special.spherical_jn + + j_expected = np.array([ + +3.33929164246994992e+00 + +0.00000000000000000e+00j, + +0.00000000000000000e+00 + +2.24279011776926884e+00j, + -1.09650152470070195e+00 + +0.00000000000000000e+00j, + +2.77555756156289135e-17 + -4.15287576601431119e-01j, + +1.27497179297362317e-01 + +0.00000000000000000e+00j, + -3.46944695195361419e-18 + +3.27960387093445271e-02j, + -7.24503736309898075e-03 + -4.33680868994201774e-19j, + +0.00000000000000000e+00 + -1.40087680258226812e-03j, + +2.40653350187633002e-04 + +0.00000000000000000e+00j, + +0.00000000000000000e+00 + +3.71744848523478122e-05j, + ]) + + assert np.allclose(j, j_expected) + + jder_expected = np.array([ + -0.00000000000000000e+00 + -2.24279011776926884e+00j, + +1.84409823062377076e+00 + +0.00000000000000000e+00j, + +0.00000000000000000e+00 + +1.14628859306856690e+00j, + -5.42784755898793825e-01 + +3.70074341541718826e-17j, + +2.77555756156289135e-17 + -2.02792277772493951e-01j, + +6.19051018786732632e-02 + -6.93889390390722838e-18j, + -2.45752492430047685e-18 + +1.58909515287802387e-02j, + -3.50936588954626604e-03 + -4.33680868994201774e-19j, + +0.00000000000000000e+00 + -6.78916752019369197e-04j, + +1.16738400679806980e-04 + +0.00000000000000000e+00j, + ]) + + assert np.allclose(jder, jder_expected) + + +def test_spherical_hankel_functions(): + import pytential.qbx.target_specific as ts + + nterms = 9 + z = 2 + 3j + scale = 1 + h = np.zeros(1 + nterms, dtype=np.complex) + hder = np.zeros(1 + nterms, dtype=np.complex) + ts.h3dall_wrapper(nterms, z, scale, h, hder) + + # Reference solution computed using + # scipy.special.spherical_jn + 1j * scipy.special.spherical_yn + h_expected = np.array([ + +1.17460537937623677e-02 + -7.25971518952217565e-03j, + -7.12794888037171503e-03 + -1.55735608522498126e-02j, + -2.58175723285687941e-02 + +5.00665171335734627e-03j, + -6.95481631849959037e-03 + +4.92143379339500253e-02j, + +9.78278544942576822e-02 + +5.92281078069348405e-02j, + +2.65420992601874961e-01 + -1.70387117227806167e-01j, + -8.11750107462848453e-02 + -1.02133651818182791e+00j, + -3.49178056863992792e+00 + -1.62876088689699405e+00j, + -1.36147986022969878e+01 + +9.34959028601928743e+00j, + +4.56300765393887087e+00 + +7.94934376901125432e+01j, + ]) + + assert np.allclose(h, h_expected) + + hder_expected = np.array([ + +7.12794888037171503e-03 + +1.55735608522498126e-02j, + +2.11270661502996893e-02 + -5.75767287207851197e-03j, + +1.32171023895111261e-03 + -3.57580271012700734e-02j, + -6.69663049946767064e-02 + -3.16989251553807527e-02j, + -1.50547136475930293e-01 + +1.16532548652759055e-01j, + +8.87444851771816839e-02 + +5.84014513465967444e-01j, + +2.00269153354544205e+00 + +7.98384884993240895e-01j, + +7.22334424954346144e+00 + -5.46307186102847187e+00j, + -4.05890079026877615e+00 + -4.28512368415405192e+01j, + -2.04081205047078043e+02 + -1.02417988497371837e+02j, + ]) + + assert np.allclose(hder, hder_expected) + + +@pytest.mark.parametrize("op", ["S", "D", "Sp"]) +@pytest.mark.parametrize("helmholtz_k", [0, 1.2, 12 + 1.2j]) +@pytest.mark.parametrize("qbx_order", [0, 1, 5]) +def test_target_specific_qbx(ctx_getter, op, helmholtz_k, qbx_order): + logging.basicConfig(level=logging.INFO) + + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + target_order = 4 + fmm_tol = 1e-3 + + from meshmode.mesh.generation import generate_icosphere + mesh = generate_icosphere(1, target_order) + + from meshmode.discretization import Discretization + from meshmode.discretization.poly_element import \ + InterpolatoryQuadratureSimplexGroupFactory + from pytential.qbx import QBXLayerPotentialSource + pre_density_discr = Discretization( + cl_ctx, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + + from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder + + refiner_extra_kwargs = {} + + if helmholtz_k != 0: + refiner_extra_kwargs["kernel_length_scale"] = 5 / abs(helmholtz_k) + + qbx, _ = QBXLayerPotentialSource( + pre_density_discr, 4*target_order, + qbx_order=qbx_order, + fmm_level_to_order=SimpleExpansionOrderFinder(fmm_tol), + fmm_backend="fmmlib", + _expansions_in_tree_have_extent=True, + _expansion_stick_out_factor=0.9, + _use_target_specific_qbx=False, + ).with_refinement(**refiner_extra_kwargs) + + density_discr = qbx.density_discr + + nodes = density_discr.nodes().with_queue(queue) + u_dev = clmath.sin(nodes[0]) + + if helmholtz_k == 0: + kernel = LaplaceKernel(3) + kernel_kwargs = {} + else: + kernel = HelmholtzKernel(3, allow_evanescent=True) + kernel_kwargs = {"k": sym.var("k")} + + u_sym = sym.var("u") + + if op == "S": + op = sym.S + elif op == "D": + op = sym.D + elif op == "Sp": + op = sym.Sp + else: + raise ValueError("unknown operator: '%s'" % op) + + expr = op(kernel, u_sym, qbx_forced_limit=-1, **kernel_kwargs) + + bound_op = bind(qbx, expr) + pot_ref = bound_op(queue, u=u_dev, k=helmholtz_k).get() + + qbx = qbx.copy(_use_target_specific_qbx=True) + bound_op = bind(qbx, expr) + pot_tsqbx = bound_op(queue, u=u_dev, k=helmholtz_k).get() + + assert np.allclose(pot_tsqbx, pot_ref, atol=1e-13, rtol=1e-13) + + +# You can test individual routines by typing +# $ python test_target_specific_qbx.py 'test_routine()' + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker