diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 4e03ceca60980f5bf0fccc01df3e9f01df127b5f..81900a9b0e4b58246fdcccdf224c9c4d8a412df1 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -83,6 +83,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_crit=None, _from_sep_smaller_min_nsources_cumul=None, _tree_kind="adaptive", + _use_tsqbx_list1=False, geometry_data_inspector=None, fmm_backend="sumpy", target_stick_out_factor=_not_provided): @@ -179,6 +180,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self._from_sep_smaller_min_nsources_cumul = \ _from_sep_smaller_min_nsources_cumul self._tree_kind = _tree_kind + self._use_tsqbx_list1 = _use_tsqbx_list1 self.geometry_data_inspector = geometry_data_inspector # /!\ *All* parameters set here must also be set by copy() below, @@ -198,6 +200,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _expansions_in_tree_have_extent=_not_provided, _expansion_stick_out_factor=_not_provided, _tree_kind=None, + _use_tsqbx_list1=_not_provided, geometry_data_inspector=None, debug=_not_provided, @@ -277,6 +280,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_min_nsources_cumul=( self._from_sep_smaller_min_nsources_cumul), _tree_kind=_tree_kind or self._tree_kind, + _use_tsqbx_list1=_use_tsqbx_list1 if _use_tsqbx_list1 is not _not_provided + else self._use_tsqbx_list1, geometry_data_inspector=( geometry_data_inspector or self.geometry_data_inspector), fmm_backend=self.fmm_backend, @@ -694,7 +699,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self.qbx_order, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, - kernel_extra_kwargs=kernel_extra_kwargs) + kernel_extra_kwargs=kernel_extra_kwargs, + _use_target_specific_list1=self._use_tsqbx_list1) from pytential.qbx.geometry import target_state if (geo_data.user_target_to_center().with_queue(queue) diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index d3622e5923e2846acd02285303e8e76d7054ad02..9c39c9b4ff44dec7a1eaa1453071add1839607a1 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -90,12 +90,14 @@ class QBXSumpyExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + _use_target_specific_list1=False): return QBXExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs) + kernel_extra_kwargs, + _use_target_specific_list1) class QBXExpansionWrangler(SumpyExpansionWrangler): @@ -119,7 +121,11 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), def __init__(self, code_container, queue, geo_data, dtype, qbx_order, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs): + source_extra_kwargs, kernel_extra_kwargs, + _use_target_specific_list1=False): + if _use_target_specific_list1: + raise NotImplementedError("Cannot use TSQBX with sumpy yet") + SumpyExpansionWrangler.__init__(self, code_container, queue, geo_data.tree(), dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs) @@ -358,6 +364,11 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), return pot + @log_process(logger) + def eval_target_specific_global_qbx_locals(self, src_weights): + # Not implemented + pass + # }}} # }}} @@ -490,6 +501,9 @@ def drive_fmm(expansion_wrangler, src_weights): qbx_potentials = wrangler.eval_qbx_expansions( qbx_expansions) + qbx_potentials = qbx_potentials + \ + wrangler.eval_target_specific_global_qbx_locals(src_weights) + # }}} # {{{ reorder potentials diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index 2b6cbf88ce53364e4b7efc0b1ad01529eaaea0c9..43fcbb13d44bc29776c13951ab6dc0a643eb0b69 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -28,6 +28,7 @@ import pyopencl as cl # noqa import pyopencl.array # noqa: F401 from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler from sumpy.kernel import LaplaceKernel, HelmholtzKernel +import pytential.qbx.target_specific as target_specific from pytools import log_process @@ -54,12 +55,14 @@ class QBXFMMLibExpansionWranglerCodeContainer(object): def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + _use_target_specific_list1=False): return QBXFMMLibExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs) + kernel_extra_kwargs, + _use_target_specific_list1) # }}} @@ -128,10 +131,12 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): def __init__(self, code, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs): + kernel_extra_kwargs, + _use_target_specific_list1=False): self.code = code self.queue = queue + self._use_target_specific_list1 = _use_target_specific_list1 # FMMLib is CPU-only. This wrapper gets the geometry out of # OpenCL-land. @@ -275,6 +280,13 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): raise ValueError("element '%s' of outputs array not " "understood" % out) + @memoize_method + def _get_single_centers_array(self): + return np.array([ + self.geo_data.centers()[idim] + for idim in range(self.dim) + ], order="F") + # }}} # {{{ override target lists to only hit non-QBX targets @@ -302,6 +314,9 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): @log_process(logger) def form_global_qbx_locals(self, src_weights): + if self._use_target_specific_list1: + return self.qbx_local_expansion_zeros() + geo_data = self.geo_data trav = geo_data.traversal() @@ -557,6 +572,39 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): return output + @log_process(logger) + def eval_target_specific_global_qbx_locals(self, src_weights): + if not self._use_target_specific_list1: + return self.full_output_zeros() + + pot = self.full_output_zeros() + geo_data = self.geo_data + trav = geo_data.traversal() + + ctt = geo_data.center_to_tree_targets() + + # TODO: assert this is the Laplace single or double layer kernel + + for output in pot: + target_specific.eval_target_specific_global_qbx_locals( + order=self.qbx_order, + sources=self._get_single_sources_array(), + targets=geo_data.all_targets(), + centers=self._get_single_centers_array(), + global_qbx_centers=geo_data.global_qbx_centers(), + qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), + center_to_target_starts=ctt.starts, + center_to_target_lists=ctt.lists, + source_box_starts=trav.neighbor_source_boxes_starts, + source_box_lists=trav.neighbor_source_boxes_lists, + box_source_starts=self.tree.box_source_starts, + box_source_counts_nonchild=self.tree.box_source_counts_nonchild, + dipstr=src_weights, + dipvec=self.dipole_vec, + pot=output) + + return pot + def finalize_potentials(self, potential): potential = super(QBXFMMLibExpansionWrangler, self).finalize_potentials( potential) diff --git a/pytential/qbx/target_specific.pyx b/pytential/qbx/target_specific.pyx new file mode 100644 index 0000000000000000000000000000000000000000..7cde3a57efd42606fa7f22d8e50e4f4afaa93ebe --- /dev/null +++ b/pytential/qbx/target_specific.pyx @@ -0,0 +1,232 @@ +#!python +#cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True + +import numpy as np +import cython +import cython.parallel + +from libc.math cimport sqrt +from libc.stdio cimport printf + +cimport openmp + + +cdef void legvals(double x, int n, double[] vals, double[] derivs) nogil: + """Compute the values of the Legendre polynomial up to order n at x. + Optionally, if derivs is non-NULL, compute the values of the derivative too. + + Borrowed from fmmlib. + """ + cdef: + double pj, derj, pjm2, pjm1, derjm2, derjm1 + int j + + pjm2 = 1 + pjm1 = x + + vals[0] = 1 + if derivs != NULL: + derivs[0] = 0 + derjm2 = 0 + derjm1 = 1 + + if n == 0: + return + + vals[1] = x + if derivs != NULL: + derivs[1] = 1 + + if n == 1: + return + + for j in range(2, n + 1): + pj = ( (2*j-1)*x*pjm1-(j-1)*pjm2 ) / j + vals[j] = pj + + if derivs != NULL: + derj = (2*j-1)*(pjm1+x*derjm1)-(j-1)*derjm2 + derj = derj / j + derivs[j] = derj + derjm2 = derjm1 + derjm1 = derj + + pjm2 = pjm1 + pjm1 = pj + + +cdef double dist(double[3] a, double[3] b) nogil: + return sqrt( + (a[0] - b[0]) * (a[0] - b[0]) + + (a[1] - b[1]) * (a[1] - b[1]) + + (a[2] - b[2]) * (a[2] - b[2])) + + +cdef void tsqbx_grad_from_source( + double[3] source, + double[3] center, + double[3] target, + double[3] grad, + int order) nogil: + cdef: + int i, j + double result, sc_d, tc_d, cos_angle, alpha, R + double[128] tmp + double[128] derivs + double[3] cms + double[3] tmc + + for j in range(3): + cms[j] = center[j] - source[j] + tmc[j] = target[j] - center[j] + grad[j] = 0 + + tc_d = dist(target, center) + sc_d = dist(source, center) + + alpha = ( + (target[0] - center[0]) * (source[0] - center[0]) + + (target[1] - center[1]) * (source[1] - center[1]) + + (target[2] - center[2]) * (source[2] - center[2])) + + cos_angle = alpha / (tc_d * sc_d) + + legvals(cos_angle, order, tmp, derivs) + + R = 1 / sc_d + + for i in range(0, order + 1): + # Invariant: R = (t_cd ** i / sc_d ** (i + 1)) + for j in range(3): + grad[j] += (i + 1) * cms[j] / (sc_d * sc_d) * R * tmp[i] + for j in range(3): + # Siegel and Tornberg has a sign flip here :( + grad[j] += ( + tmc[j] / (tc_d * sc_d) + + alpha * cms[j] / (tc_d * sc_d * sc_d * sc_d)) * R * derivs[i] + R *= (tc_d / sc_d) + + return + + +cdef double tsqbx_from_source( + double[3] source, + double[3] center, + double[3] target, + int order) nogil: + cdef: + int i + double result, r, sc_d, tc_d, cos_angle + double tmp[128] + + tc_d = dist(target, center) + sc_d = dist(source, center) + + cos_angle = (( + (target[0] - center[0]) * (source[0] - center[0]) + + (target[1] - center[1]) * (source[1] - center[1]) + + (target[2] - center[2]) * (source[2] - center[2])) + / (tc_d * sc_d)) + + legvals(cos_angle, order, tmp, NULL) + + result = 0 + r = 1 / sc_d + + for i in range(0, order + 1): + result += tmp[i] * r + r *= (tc_d / sc_d) + + return result + + +def eval_target_specific_global_qbx_locals( + int order, + double[:,:] sources, + double[:,:] targets, + double[:,:] centers, + int[:] global_qbx_centers, + int[:] qbx_center_to_target_box, + int[:] center_to_target_starts, int[:] center_to_target_lists, + int[:] source_box_starts, int[:] source_box_lists, + int[:] box_source_starts, int[:] box_source_counts_nonchild, + double[:] dipstr, + double[:,:] dipvec, + double complex[:] pot): + + cdef: + int tgt, ictr, ctr + int itgt, itgt_start, itgt_end + int tgt_box, src_ibox + int isrc_box, isrc_box_start, isrc_box_end + int isrc, isrc_start, isrc_end + int i, tid + double result + double[:,:] source, center, target, grad + int slp, dlp + + slp = (dipstr is not None) and (dipvec is None) + dlp = (dipstr is not None) and (dipvec is not None) + + print("Hi from Cython") + + if not (slp or dlp): + raise ValueError("should specify exactly one of src_weights or dipvec") + + # Hack to obtain thread-local storage + maxthreads = openmp.omp_get_max_threads() + + # Prevent false sharing by over-allocating the buffers + source = np.zeros((maxthreads, 65)) + target = np.zeros((maxthreads, 65)) + center = np.zeros((maxthreads, 65)) + grad = np.zeros((maxthreads, 65)) + + # TODO: Check if order > 256 + + for ictr in cython.parallel.prange(0, global_qbx_centers.shape[0], + nogil=True, schedule="static", + chunksize=128): + ctr = global_qbx_centers[ictr] + itgt_start = center_to_target_starts[ctr] + itgt_end = center_to_target_starts[ctr + 1] + tgt_box = qbx_center_to_target_box[ctr] + tid = cython.parallel.threadid() + + for i in range(3): + center[tid, i] = centers[i, ctr] + + for itgt in range(itgt_start, itgt_end): + result = 0 + tgt = center_to_target_lists[itgt] + + for i in range(3): + target[tid, i] = targets[i, tgt] + + isrc_box_start = source_box_starts[tgt_box] + isrc_box_end = source_box_starts[tgt_box + 1] + + for isrc_box in range(isrc_box_start, isrc_box_end): + src_ibox = source_box_lists[isrc_box] + isrc_start = box_source_starts[src_ibox] + isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] + + for isrc in range(isrc_start, isrc_end): + for i in range(3): + source[tid, i] = sources[i, isrc] + + if slp: + # Don't replace with +=, since that makes Cython think + # it is a reduction. + result = result + dipstr[isrc] * ( + tsqbx_from_source(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], order)) + elif dlp: + tsqbx_grad_from_source(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], &grad[tid, 0], order) + result = result + dipstr[isrc] * ( + grad[tid, 0] * dipvec[0, isrc] + + grad[tid, 1] * dipvec[1, isrc] + + grad[tid, 2] * dipvec[2, isrc]) + + pot[tgt] = pot[tgt] + result diff --git a/setup.py b/setup.py index 84438494a9bf49fd79df6dab0513e763833c3ae4..59414845e15bdcf96d1b31ed7835a2aff41709e2 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,8 @@ import os from setuptools import setup, find_packages +from setuptools.extension import Extension +from Cython.Build import cythonize # {{{ capture git revision at install time @@ -54,6 +56,16 @@ write_git_revision("pytential") # }}} +ext_modules = [ + Extension( + "pytential.qbx.target_specific", + ["pytential/qbx/target_specific.pyx"], + extra_compile_args=["-fopenmp"], + extra_link_args=["-fopenmp"] + ) +] + + version_dict = {} init_filename = "pytential/version.py" os.environ["AKPYTHON_EXEC_FROM_WITHIN_WITHIN_SETUP_PY"] = "1" @@ -91,6 +103,8 @@ setup(name="pytential", packages=find_packages(), + ext_modules = cythonize(ext_modules), + install_requires=[ "pytest>=2.3", # FIXME leave out for now diff --git a/test/test_target_specific_qbx.py b/test/test_target_specific_qbx.py new file mode 100644 index 0000000000000000000000000000000000000000..60a801d447d31afff16985fc3a7bd10ae48c41d3 --- /dev/null +++ b/test/test_target_specific_qbx.py @@ -0,0 +1,123 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2013-2017 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.clmath # noqa +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from functools import partial +from meshmode.mesh.generation import ( # noqa + ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle, + NArmedStarfish, + make_curve_mesh) +# from sumpy.visualization import FieldPlotter +from pytential import bind, sym, norm +from sumpy.kernel import LaplaceKernel, HelmholtzKernel + +import logging +logger = logging.getLogger(__name__) + +try: + import matplotlib.pyplot as pt +except ImportError: + pass + + +@pytest.mark.parametrize("op", ["S", "D"]) +def test_target_specific_qbx(ctx_getter, op): + logging.basicConfig(level=logging.INFO) + + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + target_order = 4 + + from meshmode.mesh.generation import generate_icosphere + mesh = generate_icosphere(1, target_order) + + from meshmode.discretization import Discretization + from meshmode.discretization.poly_element import \ + InterpolatoryQuadratureSimplexGroupFactory + from pytential.qbx import QBXLayerPotentialSource + pre_density_discr = Discretization( + cl_ctx, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + + qbx, _ = QBXLayerPotentialSource( + pre_density_discr, 4*target_order, + qbx_order=5, + fmm_order=10, + fmm_backend="fmmlib", + _expansions_in_tree_have_extent=True, + _expansion_stick_out_factor=0.9, + ).with_refinement() + + density_discr = qbx.density_discr + + nodes_host = density_discr.nodes().get(queue) + center = np.array([3, 1, 2]) + diff = nodes_host - center[:, np.newaxis] + + dist_squared = np.sum(diff**2, axis=0) + dist = np.sqrt(dist_squared) + u = 1/dist + + u_dev = cl.array.to_device(queue, u) + + kernel = LaplaceKernel(3) + u_sym = sym.var("u") + + if op == "S": + op = sym.S + elif op == "D": + op = sym.D + expr = op(kernel, u_sym, qbx_forced_limit=-1) + + bound_op = bind(qbx, expr) + slp_ref = bound_op(queue, u=u_dev) + + qbx = qbx.copy(_use_tsqbx_list1=True) + bound_op = bind(qbx, expr) + slp_tsqbx = bound_op(queue, u=u_dev) + + assert (np.max(np.abs(slp_ref.get() - slp_tsqbx.get()))) < 1e-13 + + +# You can test individual routines by typing +# $ python test_layer_pot_identity.py 'test_routine()' + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker