From 0abf04e52a8f980544c8d14f89e5a2f1409134d1 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 26 Apr 2018 20:22:05 -0500 Subject: [PATCH 1/6] Add tsqbx module. --- pytential/qbx/target_specific.pyx | 138 ++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 pytential/qbx/target_specific.pyx diff --git a/pytential/qbx/target_specific.pyx b/pytential/qbx/target_specific.pyx new file mode 100644 index 00000000..184ec4d5 --- /dev/null +++ b/pytential/qbx/target_specific.pyx @@ -0,0 +1,138 @@ +#!python +#cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True + +import numpy as np +import cython +import cython.parallel + +from libc.math cimport sqrt +cimport openmp + + +cdef double legendre(double x, int n, double[] coeffs) nogil: + """Evaluate the Legendre series of order n at x. + + Taken from SciPy. + """ + cdef: + double c0, c1, tmp + int nd, i + + if n == 0: + c0 = coeffs[0] + c1 = 0 + elif n == 1: + c0 = coeffs[0] + c1 = coeffs[1] + else: + nd = n + 1 + c0 = coeffs[n - 1] + c1 = coeffs[n] + + for i in range(3, n + 2): + tmp = c0 + nd = nd - 1 + c0 = coeffs[1+n-i] - (c1*(nd - 1))/nd + c1 = tmp + (c1*x*(2*nd - 1))/n + return c0 + c1*x + + +cdef double dist(double[3] a, double[3] b) nogil: + return sqrt( + (a[0] - b[0]) * (a[0] - b[0]) + + (a[1] - b[1]) * (a[1] - b[1]) + + (a[2] - b[2]) * (a[2] - b[2])) + + +cdef double tsqbx_from_source( + double[3] source, + double[3] center, + double[3] target, + int order, + double[] tmp) nogil: + cdef: + int i + double r, sc_d, tc_d + double cos_angle + + tc_d = dist(target, center) + sc_d = dist(source, center) + r = sc_d / tc_d + tmp[0] = 1 / tc_d + + for i in range(1, order + 1): + tmp[i] = tmp[i - 1] * r + + cos_angle = (( + (target[0] - center[0]) * (source[0] - center[0]) + + (target[1] - center[1]) * (source[1] - center[1]) + + (target[2] - center[2]) * (source[2] - center[2])) + / (tc_d * sc_d)) + + return legendre(cos_angle, order, tmp) + + +def form_target_specific_qbx_contributions( + double[:,:] sources, + double[:,:] targets, + double[:,:] global_qbx_centers, + int[:] target_to_center, + int order, + int[:] qbx_center_to_target_box, + int[:] source_box_starts, int[:] source_box_lists, + int[:] box_source_starts, int[:] box_source_counts_nonchild, + double[:] source_weights, + double[:] pot): + + cdef: + int itgt, icenter + int tgt_box, src_ibox + int isrc_box, isrc_box_start, isrc_box_end + int isrc, isrc_start, isrc_end + int i, tid + double result + double[:,:] source, center, target, tmp + + # Yucky thread-local hack + maxthreads = openmp.omp_get_max_threads() + + source = np.zeros((maxthreads, 3)) + target = np.zeros((maxthreads, 3)) + center = np.zeros((maxthreads, 3)) + tmp = np.zeros((maxthreads, 256)) + + # TODO: Check if order > 256 + + for itgt in cython.parallel.prange(0, targets.shape[1], nogil=True, + schedule="dynamic", chunksize=10): + icenter = target_to_center[itgt] + if icenter == -1: + continue + + result = 0 + + tgt_box = qbx_center_to_target_box[icenter] + + tid = cython.parallel.threadid() + + for i in range(3): + target[tid, i] = targets[itgt, i] + center[tid, i] = global_qbx_centers[icenter, i] + + isrc_box_start = source_box_starts[tgt_box] + isrc_box_end = source_box_starts[tgt_box + 1] + + for isrc_box in range(isrc_box_start, isrc_box_end): + src_ibox = source_box_lists[isrc_box] + isrc_start = box_source_starts[src_ibox] + isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] + + for isrc in range(isrc_start, isrc_end): + for i in range(3): + source[tid, i] = sources[i, isrc] + + result = result + source_weights[isrc] * ( + tsqbx_from_source(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], order, &tmp[tid, 0])) + + pot[itgt] = pot[itgt] + result -- GitLab From fe82084331cbbc92b27c60d80584ace981da11d7 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 26 Apr 2018 20:27:49 -0500 Subject: [PATCH 2/6] Add extension to setup.py --- setup.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/setup.py b/setup.py index 84438494..89dc02d4 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,8 @@ import os from setuptools import setup, find_packages +from setuptools.extension import Extension +from Cython.Build import cythonize # {{{ capture git revision at install time @@ -54,6 +56,16 @@ write_git_revision("pytential") # }}} +ext_modules = [ + Extension( + "pytential.qbx.target_specific", + ["pytential/qbx/target_specific.pyx"], + extra_compile_args=['-fopenmp'], + extra_link_args=['-fopenmp'] + ) +] + + version_dict = {} init_filename = "pytential/version.py" os.environ["AKPYTHON_EXEC_FROM_WITHIN_WITHIN_SETUP_PY"] = "1" @@ -91,6 +103,8 @@ setup(name="pytential", packages=find_packages(), + ext_modules = cythonize(ext_modules), + install_requires=[ "pytest>=2.3", # FIXME leave out for now -- GitLab From ad70bb9d714d0fa4a0077c45c3dc5ca503b840bb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 26 Apr 2018 22:41:05 -0500 Subject: [PATCH 3/6] TSQBX working for SLP --- pytential/qbx/__init__.py | 8 ++- pytential/qbx/fmm.py | 20 ++++++- pytential/qbx/fmmlib.py | 53 ++++++++++++++++- pytential/qbx/target_specific.pyx | 99 +++++++++++++++++-------------- setup.py | 2 +- 5 files changed, 128 insertions(+), 54 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 4e03ceca..81900a9b 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -83,6 +83,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_crit=None, _from_sep_smaller_min_nsources_cumul=None, _tree_kind="adaptive", + _use_tsqbx_list1=False, geometry_data_inspector=None, fmm_backend="sumpy", target_stick_out_factor=_not_provided): @@ -179,6 +180,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self._from_sep_smaller_min_nsources_cumul = \ _from_sep_smaller_min_nsources_cumul self._tree_kind = _tree_kind + self._use_tsqbx_list1 = _use_tsqbx_list1 self.geometry_data_inspector = geometry_data_inspector # /!\ *All* parameters set here must also be set by copy() below, @@ -198,6 +200,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _expansions_in_tree_have_extent=_not_provided, _expansion_stick_out_factor=_not_provided, _tree_kind=None, + _use_tsqbx_list1=_not_provided, geometry_data_inspector=None, debug=_not_provided, @@ -277,6 +280,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_min_nsources_cumul=( self._from_sep_smaller_min_nsources_cumul), _tree_kind=_tree_kind or self._tree_kind, + _use_tsqbx_list1=_use_tsqbx_list1 if _use_tsqbx_list1 is not _not_provided + else self._use_tsqbx_list1, geometry_data_inspector=( geometry_data_inspector or self.geometry_data_inspector), fmm_backend=self.fmm_backend, @@ -694,7 +699,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self.qbx_order, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, - kernel_extra_kwargs=kernel_extra_kwargs) + kernel_extra_kwargs=kernel_extra_kwargs, + _use_target_specific_list1=self._use_tsqbx_list1) from pytential.qbx.geometry import target_state if (geo_data.user_target_to_center().with_queue(queue) diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index d3622e59..9c39c9b4 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -90,12 +90,14 @@ class QBXSumpyExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + _use_target_specific_list1=False): return QBXExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs) + kernel_extra_kwargs, + _use_target_specific_list1) class QBXExpansionWrangler(SumpyExpansionWrangler): @@ -119,7 +121,11 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), def __init__(self, code_container, queue, geo_data, dtype, qbx_order, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs): + source_extra_kwargs, kernel_extra_kwargs, + _use_target_specific_list1=False): + if _use_target_specific_list1: + raise NotImplementedError("Cannot use TSQBX with sumpy yet") + SumpyExpansionWrangler.__init__(self, code_container, queue, geo_data.tree(), dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs) @@ -358,6 +364,11 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), return pot + @log_process(logger) + def eval_target_specific_global_qbx_locals(self, src_weights): + # Not implemented + pass + # }}} # }}} @@ -490,6 +501,9 @@ def drive_fmm(expansion_wrangler, src_weights): qbx_potentials = wrangler.eval_qbx_expansions( qbx_expansions) + qbx_potentials = qbx_potentials + \ + wrangler.eval_target_specific_global_qbx_locals(src_weights) + # }}} # {{{ reorder potentials diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index 2b6cbf88..f19d23e6 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -28,6 +28,7 @@ import pyopencl as cl # noqa import pyopencl.array # noqa: F401 from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler from sumpy.kernel import LaplaceKernel, HelmholtzKernel +import pytential.qbx.target_specific as target_specific from pytools import log_process @@ -54,12 +55,14 @@ class QBXFMMLibExpansionWranglerCodeContainer(object): def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + _use_tsqbx_list1=False): return QBXFMMLibExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs) + kernel_extra_kwargs, + _use_tsqbx_list1) # }}} @@ -128,10 +131,12 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): def __init__(self, code, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs): + kernel_extra_kwargs, + _use_target_specific_list1=False): self.code = code self.queue = queue + self._use_target_specific_list1 = _use_target_specific_list1 # FMMLib is CPU-only. This wrapper gets the geometry out of # OpenCL-land. @@ -275,6 +280,13 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): raise ValueError("element '%s' of outputs array not " "understood" % out) + @memoize_method + def _get_single_centers_array(self): + return np.array([ + self.geo_data.centers()[idim] + for idim in range(self.dim) + ], order="F") + # }}} # {{{ override target lists to only hit non-QBX targets @@ -302,6 +314,9 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): @log_process(logger) def form_global_qbx_locals(self, src_weights): + if self._use_target_specific_list1: + return self.qbx_local_expansion_zeros() + geo_data = self.geo_data trav = geo_data.traversal() @@ -557,6 +572,38 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): return output + @log_process(logger) + def eval_target_specific_global_qbx_locals(self, src_weights): + if not self._use_target_specific_list1: + return self.full_output_zeros() + + pot = self.full_output_zeros() + geo_data = self.geo_data + trav = geo_data.traversal() + + ctt = geo_data.center_to_tree_targets() + + # TODO: assert this is the Laplace single or double layer kernel + + for output in pot: + target_specific.eval_target_specific_global_qbx_locals( + order=self.qbx_order, + sources=self._get_single_sources_array(), + targets=geo_data.all_targets(), + centers=self._get_single_centers_array(), + global_qbx_centers=geo_data.global_qbx_centers(), + qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), + center_to_target_starts=ctt.starts, + center_to_target_lists=ctt.lists, + source_box_starts=trav.neighbor_source_boxes_starts, + source_box_lists=trav.neighbor_source_boxes_lists, + box_source_starts=self.tree.box_source_starts, + box_source_counts_nonchild=self.tree.box_source_counts_nonchild, + src_weights=src_weights, + pot=output) + + return pot + def finalize_potentials(self, potential): potential = super(QBXFMMLibExpansionWrangler, self).finalize_potentials( potential) diff --git a/pytential/qbx/target_specific.pyx b/pytential/qbx/target_specific.pyx index 184ec4d5..c668703a 100644 --- a/pytential/qbx/target_specific.pyx +++ b/pytential/qbx/target_specific.pyx @@ -6,6 +6,8 @@ import cython import cython.parallel from libc.math cimport sqrt +from libc.stdio cimport printf + cimport openmp @@ -17,7 +19,7 @@ cdef double legendre(double x, int n, double[] coeffs) nogil: cdef: double c0, c1, tmp int nd, i - + if n == 0: c0 = coeffs[0] c1 = 0 @@ -28,12 +30,12 @@ cdef double legendre(double x, int n, double[] coeffs) nogil: nd = n + 1 c0 = coeffs[n - 1] c1 = coeffs[n] - + for i in range(3, n + 2): tmp = c0 nd = nd - 1 c0 = coeffs[1+n-i] - (c1*(nd - 1))/nd - c1 = tmp + (c1*x*(2*nd - 1))/n + c1 = tmp + (c1*x*(2*nd - 1))/nd return c0 + c1*x @@ -57,8 +59,8 @@ cdef double tsqbx_from_source( tc_d = dist(target, center) sc_d = dist(source, center) - r = sc_d / tc_d - tmp[0] = 1 / tc_d + r = tc_d / sc_d + tmp[0] = 1 / sc_d for i in range(1, order + 1): tmp[i] = tmp[i - 1] * r @@ -72,67 +74,72 @@ cdef double tsqbx_from_source( return legendre(cos_angle, order, tmp) -def form_target_specific_qbx_contributions( +def eval_target_specific_global_qbx_locals( + int order, double[:,:] sources, double[:,:] targets, - double[:,:] global_qbx_centers, - int[:] target_to_center, - int order, + double[:,:] centers, + int[:] global_qbx_centers, int[:] qbx_center_to_target_box, + int[:] center_to_target_starts, int[:] center_to_target_lists, int[:] source_box_starts, int[:] source_box_lists, int[:] box_source_starts, int[:] box_source_counts_nonchild, - double[:] source_weights, - double[:] pot): + double[:] src_weights, + double complex[:] pot): cdef: - int itgt, icenter + int tgt, ictr, ctr + int itgt, itgt_start, itgt_end int tgt_box, src_ibox int isrc_box, isrc_box_start, isrc_box_end int isrc, isrc_start, isrc_end int i, tid - double result + double result double[:,:] source, center, target, tmp # Yucky thread-local hack maxthreads = openmp.omp_get_max_threads() - source = np.zeros((maxthreads, 3)) - target = np.zeros((maxthreads, 3)) - center = np.zeros((maxthreads, 3)) - tmp = np.zeros((maxthreads, 256)) + source = np.zeros((1 + maxthreads, 3)) + target = np.zeros((1 + maxthreads, 3)) + center = np.zeros((1 + maxthreads, 3)) + tmp = np.zeros((1 + maxthreads, 256)) # TODO: Check if order > 256 - for itgt in cython.parallel.prange(0, targets.shape[1], nogil=True, - schedule="dynamic", chunksize=10): - icenter = target_to_center[itgt] - if icenter == -1: - continue + for ictr in cython.parallel.prange(0, global_qbx_centers.shape[0], + nogil=True, schedule="dynamic", + chunksize=10): + ctr = global_qbx_centers[ictr] + itgt_start = center_to_target_starts[ctr] + itgt_end = center_to_target_starts[ctr + 1] + tgt_box = qbx_center_to_target_box[ctr] + tid = cython.parallel.threadid() - result = 0 + for i in range(3): + center[tid, i] = centers[i, ctr] - tgt_box = qbx_center_to_target_box[icenter] + for itgt in range(itgt_start, itgt_end): + result = 0 + tgt = center_to_target_lists[itgt] - tid = cython.parallel.threadid() + for i in range(3): + target[tid, i] = targets[i, tgt] - for i in range(3): - target[tid, i] = targets[itgt, i] - center[tid, i] = global_qbx_centers[icenter, i] - - isrc_box_start = source_box_starts[tgt_box] - isrc_box_end = source_box_starts[tgt_box + 1] - - for isrc_box in range(isrc_box_start, isrc_box_end): - src_ibox = source_box_lists[isrc_box] - isrc_start = box_source_starts[src_ibox] - isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] - - for isrc in range(isrc_start, isrc_end): - for i in range(3): - source[tid, i] = sources[i, isrc] - - result = result + source_weights[isrc] * ( - tsqbx_from_source(&source[tid, 0], ¢er[tid, 0], - &target[tid, 0], order, &tmp[tid, 0])) - - pot[itgt] = pot[itgt] + result + isrc_box_start = source_box_starts[tgt_box] + isrc_box_end = source_box_starts[tgt_box + 1] + + for isrc_box in range(isrc_box_start, isrc_box_end): + src_ibox = source_box_lists[isrc_box] + isrc_start = box_source_starts[src_ibox] + isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] + + for isrc in range(isrc_start, isrc_end): + for i in range(3): + source[tid, i] = sources[i, isrc] + + result = result + src_weights[isrc] * ( + tsqbx_from_source(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], order, &tmp[tid, 0])) + + pot[tgt] = pot[tgt] + result diff --git a/setup.py b/setup.py index 89dc02d4..a39b21bb 100644 --- a/setup.py +++ b/setup.py @@ -103,7 +103,7 @@ setup(name="pytential", packages=find_packages(), - ext_modules = cythonize(ext_modules), + ext_modules = cythonize(ext_modules) install_requires=[ "pytest>=2.3", -- GitLab From ff09893c559d340e12cc28e8627c8754eaa78d4b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 26 Apr 2018 23:32:41 -0500 Subject: [PATCH 4/6] Checkpoint --- pytential/qbx/fmmlib.py | 4 +- pytential/qbx/target_specific.pyx | 114 ++++++++++++++++++++++-------- setup.py | 2 +- 3 files changed, 88 insertions(+), 32 deletions(-) diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index f19d23e6..ae323a24 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -56,13 +56,13 @@ class QBXFMMLibExpansionWranglerCodeContainer(object): qbx_order, fmm_level_to_order, source_extra_kwargs={}, kernel_extra_kwargs=None, - _use_tsqbx_list1=False): + _use_target_specific_list1=False): return QBXFMMLibExpansionWrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs, - _use_tsqbx_list1) + _use_target_specific_list1) # }}} diff --git a/pytential/qbx/target_specific.pyx b/pytential/qbx/target_specific.pyx index c668703a..c349bb56 100644 --- a/pytential/qbx/target_specific.pyx +++ b/pytential/qbx/target_specific.pyx @@ -11,32 +11,47 @@ from libc.stdio cimport printf cimport openmp -cdef double legendre(double x, int n, double[] coeffs) nogil: - """Evaluate the Legendre series of order n at x. +cdef void legvals(double x, int n, double[] vals, double[] derivs) nogil: + """Compute the values of the Legendre polynomial up to order n at x. + Optionally, if derivs is non-NULL, compute the values of the derivative too. - Taken from SciPy. + Borrowed from fmmlib. """ cdef: - double c0, c1, tmp - int nd, i + double pj, derj, pjm2, pjm1, derjm2, derjm1 + int j + + pjm2 = 1 + pjm1 = x + + vals[0] = 1 + if derivs != NULL: + derivs[0] = 0 + derjm2 = 0 + derjm1 = 1 if n == 0: - c0 = coeffs[0] - c1 = 0 - elif n == 1: - c0 = coeffs[0] - c1 = coeffs[1] - else: - nd = n + 1 - c0 = coeffs[n - 1] - c1 = coeffs[n] - - for i in range(3, n + 2): - tmp = c0 - nd = nd - 1 - c0 = coeffs[1+n-i] - (c1*(nd - 1))/nd - c1 = tmp + (c1*x*(2*nd - 1))/nd - return c0 + c1*x + return + + vals[1] = x + if derivs != NULL: + derivs[1] = 1 + + if n == 1: + return + + for j in range(2, n + 1): + pj = ( (2*j-1)*x*pjm1-(j-1)*pjm2 ) / j + vals[j] = pj + pjm2 = pjm1 + pjm1 = pj + + if derivs != NULL: + derj = (2*j-1)*(pjm1+x*derjm1)-(j-1)*derjm2 + derj = derj / j + derivs[j] = derj + derjm2 = derjm1 + derjm1 = derj cdef double dist(double[3] a, double[3] b) nogil: @@ -46,24 +61,56 @@ cdef double dist(double[3] a, double[3] b) nogil: (a[2] - b[2]) * (a[2] - b[2])) -cdef double tsqbx_from_source( +""" +cdef void tsqbx_grad_from_source( double[3] source, double[3] center, double[3] target, + double[3] grad, int order, double[] tmp) nogil: cdef: int i - double r, sc_d, tc_d - double cos_angle + double result, r, sc_d, tc_d, cos_angle, alpha + double *derivs + + derivs = &tmp[order + 1] tc_d = dist(target, center) sc_d = dist(source, center) - r = tc_d / sc_d - tmp[0] = 1 / sc_d - for i in range(1, order + 1): - tmp[i] = tmp[i - 1] * r + alpha = ( + (target[0] - center[0]) * (source[0] - center[0]) + + (target[1] - center[1]) * (source[1] - center[1]) + + (target[2] - center[2]) * (source[2] - center[2]) + + cos_angle = alpha / (tc_d * sc_d) + + legvals(cos_angle, order, tmp, derivs) + + result = 0 + r = 1 / sc_d + + for i in range(0, order + 1): + result = result + tmp[i] * r + r = r * (tc_d / sc_d) + + return result +""" + + +cdef double tsqbx_from_source( + double[3] source, + double[3] center, + double[3] target, + int order, + double[] tmp) nogil: + cdef: + int i + double result, r, sc_d, tc_d, cos_angle + + tc_d = dist(target, center) + sc_d = dist(source, center) cos_angle = (( (target[0] - center[0]) * (source[0] - center[0]) + @@ -71,7 +118,16 @@ cdef double tsqbx_from_source( (target[2] - center[2]) * (source[2] - center[2])) / (tc_d * sc_d)) - return legendre(cos_angle, order, tmp) + legvals(cos_angle, order, tmp, NULL) + + result = 0 + r = 1 / sc_d + + for i in range(0, order + 1): + result = result + tmp[i] * r + r = r * (tc_d / sc_d) + + return result def eval_target_specific_global_qbx_locals( diff --git a/setup.py b/setup.py index a39b21bb..89dc02d4 100644 --- a/setup.py +++ b/setup.py @@ -103,7 +103,7 @@ setup(name="pytential", packages=find_packages(), - ext_modules = cythonize(ext_modules) + ext_modules = cythonize(ext_modules), install_requires=[ "pytest>=2.3", -- GitLab From c62e9a3b81e7add42e6cdf829564488b8661ec13 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 27 Apr 2018 01:40:51 -0500 Subject: [PATCH 5/6] Get DLP working. --- pytential/qbx/fmmlib.py | 3 +- pytential/qbx/target_specific.pyx | 92 ++++++++++++++-------- test/test_target_specific_qbx.py | 123 ++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 33 deletions(-) create mode 100644 test/test_target_specific_qbx.py diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index ae323a24..43fcbb13 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -599,7 +599,8 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): source_box_lists=trav.neighbor_source_boxes_lists, box_source_starts=self.tree.box_source_starts, box_source_counts_nonchild=self.tree.box_source_counts_nonchild, - src_weights=src_weights, + dipstr=src_weights, + dipvec=self.dipole_vec, pot=output) return pot diff --git a/pytential/qbx/target_specific.pyx b/pytential/qbx/target_specific.pyx index c349bb56..cbb5d73b 100644 --- a/pytential/qbx/target_specific.pyx +++ b/pytential/qbx/target_specific.pyx @@ -43,8 +43,6 @@ cdef void legvals(double x, int n, double[] vals, double[] derivs) nogil: for j in range(2, n + 1): pj = ( (2*j-1)*x*pjm1-(j-1)*pjm2 ) / j vals[j] = pj - pjm2 = pjm1 - pjm1 = pj if derivs != NULL: derj = (2*j-1)*(pjm1+x*derjm1)-(j-1)*derjm2 @@ -53,6 +51,9 @@ cdef void legvals(double x, int n, double[] vals, double[] derivs) nogil: derjm2 = derjm1 derjm1 = derj + pjm2 = pjm1 + pjm1 = pj + cdef double dist(double[3] a, double[3] b) nogil: return sqrt( @@ -61,20 +62,24 @@ cdef double dist(double[3] a, double[3] b) nogil: (a[2] - b[2]) * (a[2] - b[2])) -""" cdef void tsqbx_grad_from_source( double[3] source, double[3] center, double[3] target, - double[3] grad, - int order, - double[] tmp) nogil: + double[3] grad, + int order) nogil: cdef: - int i - double result, r, sc_d, tc_d, cos_angle, alpha - double *derivs - - derivs = &tmp[order + 1] + int i, j + double result, sc_d, tc_d, cos_angle, alpha, R + double[128] tmp + double[128] derivs + double[3] cms + double[3] tmc + + for j in range(3): + cms[j] = center[j] - source[j] + tmc[j] = target[j] - center[j] + grad[j] = 0 tc_d = dist(target, center) sc_d = dist(source, center) @@ -82,32 +87,37 @@ cdef void tsqbx_grad_from_source( alpha = ( (target[0] - center[0]) * (source[0] - center[0]) + (target[1] - center[1]) * (source[1] - center[1]) + - (target[2] - center[2]) * (source[2] - center[2]) + (target[2] - center[2]) * (source[2] - center[2])) cos_angle = alpha / (tc_d * sc_d) legvals(cos_angle, order, tmp, derivs) - result = 0 - r = 1 / sc_d + R = 1 / sc_d for i in range(0, order + 1): - result = result + tmp[i] * r - r = r * (tc_d / sc_d) + # Invariant: R = (t_cd ** i / sc_d ** (i + 1)) + for j in range(3): + grad[j] += (i + 1) * cms[j] / (sc_d ** 2) * R * tmp[i] + for j in range(3): + # Siegel and Tornberg has a sign flip here :( + grad[j] += ( + tmc[j] / (tc_d * sc_d) + + alpha * cms[j] / (tc_d * sc_d ** 3)) * R * derivs[i] + R *= (tc_d / sc_d) - return result -""" + return cdef double tsqbx_from_source( double[3] source, double[3] center, double[3] target, - int order, - double[] tmp) nogil: + int order) nogil: cdef: int i double result, r, sc_d, tc_d, cos_angle + double tmp[128] tc_d = dist(target, center) sc_d = dist(source, center) @@ -124,8 +134,8 @@ cdef double tsqbx_from_source( r = 1 / sc_d for i in range(0, order + 1): - result = result + tmp[i] * r - r = r * (tc_d / sc_d) + result += tmp[i] * r + r *= (tc_d / sc_d) return result @@ -140,7 +150,8 @@ def eval_target_specific_global_qbx_locals( int[:] center_to_target_starts, int[:] center_to_target_lists, int[:] source_box_starts, int[:] source_box_lists, int[:] box_source_starts, int[:] box_source_counts_nonchild, - double[:] src_weights, + double[:] dipstr, + double[:,:] dipvec, double complex[:] pot): cdef: @@ -151,15 +162,22 @@ def eval_target_specific_global_qbx_locals( int isrc, isrc_start, isrc_end int i, tid double result - double[:,:] source, center, target, tmp + double[:,:] source, center, target, grad + int slp, dlp + + slp = (dipstr is not None) and (dipvec is None) + dlp = (dipstr is not None) and (dipvec is not None) + + if not (slp or dlp): + raise ValueError("should specify exactly one of src_weights or dipvec") - # Yucky thread-local hack + # Hack to obtain thread-local storage maxthreads = openmp.omp_get_max_threads() - source = np.zeros((1 + maxthreads, 3)) - target = np.zeros((1 + maxthreads, 3)) - center = np.zeros((1 + maxthreads, 3)) - tmp = np.zeros((1 + maxthreads, 256)) + source = np.zeros((maxthreads, 3)) + target = np.zeros((maxthreads, 3)) + center = np.zeros((maxthreads, 3)) + grad = np.zeros((maxthreads, 3)) # TODO: Check if order > 256 @@ -194,8 +212,18 @@ def eval_target_specific_global_qbx_locals( for i in range(3): source[tid, i] = sources[i, isrc] - result = result + src_weights[isrc] * ( - tsqbx_from_source(&source[tid, 0], ¢er[tid, 0], - &target[tid, 0], order, &tmp[tid, 0])) + if slp: + # Don't replace with +=, since that makes Cython think + # it is a reduction. + result = result + dipstr[isrc] * ( + tsqbx_from_source(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], order)) + elif dlp: + tsqbx_grad_from_source(&source[tid, 0], ¢er[tid, 0], + &target[tid, 0], &grad[tid, 0], order) + result = result + dipstr[isrc] * ( + grad[tid, 0] * dipvec[0, isrc] + + grad[tid, 1] * dipvec[1, isrc] + + grad[tid, 2] * dipvec[2, isrc]) pot[tgt] = pot[tgt] + result diff --git a/test/test_target_specific_qbx.py b/test/test_target_specific_qbx.py new file mode 100644 index 00000000..60a801d4 --- /dev/null +++ b/test/test_target_specific_qbx.py @@ -0,0 +1,123 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2013-2017 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.clmath # noqa +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from functools import partial +from meshmode.mesh.generation import ( # noqa + ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle, + NArmedStarfish, + make_curve_mesh) +# from sumpy.visualization import FieldPlotter +from pytential import bind, sym, norm +from sumpy.kernel import LaplaceKernel, HelmholtzKernel + +import logging +logger = logging.getLogger(__name__) + +try: + import matplotlib.pyplot as pt +except ImportError: + pass + + +@pytest.mark.parametrize("op", ["S", "D"]) +def test_target_specific_qbx(ctx_getter, op): + logging.basicConfig(level=logging.INFO) + + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + target_order = 4 + + from meshmode.mesh.generation import generate_icosphere + mesh = generate_icosphere(1, target_order) + + from meshmode.discretization import Discretization + from meshmode.discretization.poly_element import \ + InterpolatoryQuadratureSimplexGroupFactory + from pytential.qbx import QBXLayerPotentialSource + pre_density_discr = Discretization( + cl_ctx, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + + qbx, _ = QBXLayerPotentialSource( + pre_density_discr, 4*target_order, + qbx_order=5, + fmm_order=10, + fmm_backend="fmmlib", + _expansions_in_tree_have_extent=True, + _expansion_stick_out_factor=0.9, + ).with_refinement() + + density_discr = qbx.density_discr + + nodes_host = density_discr.nodes().get(queue) + center = np.array([3, 1, 2]) + diff = nodes_host - center[:, np.newaxis] + + dist_squared = np.sum(diff**2, axis=0) + dist = np.sqrt(dist_squared) + u = 1/dist + + u_dev = cl.array.to_device(queue, u) + + kernel = LaplaceKernel(3) + u_sym = sym.var("u") + + if op == "S": + op = sym.S + elif op == "D": + op = sym.D + expr = op(kernel, u_sym, qbx_forced_limit=-1) + + bound_op = bind(qbx, expr) + slp_ref = bound_op(queue, u=u_dev) + + qbx = qbx.copy(_use_tsqbx_list1=True) + bound_op = bind(qbx, expr) + slp_tsqbx = bound_op(queue, u=u_dev) + + assert (np.max(np.abs(slp_ref.get() - slp_tsqbx.get()))) < 1e-13 + + +# You can test individual routines by typing +# $ python test_layer_pot_identity.py 'test_routine()' + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker -- GitLab From 29356b4154d5be017a24c899e8ee341259d9d1db Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 27 Apr 2018 18:53:18 -0500 Subject: [PATCH 6/6] [ci skip] Performance tweaking. --- pytential/qbx/target_specific.pyx | 19 +++++++++++-------- setup.py | 4 ++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pytential/qbx/target_specific.pyx b/pytential/qbx/target_specific.pyx index cbb5d73b..7cde3a57 100644 --- a/pytential/qbx/target_specific.pyx +++ b/pytential/qbx/target_specific.pyx @@ -98,12 +98,12 @@ cdef void tsqbx_grad_from_source( for i in range(0, order + 1): # Invariant: R = (t_cd ** i / sc_d ** (i + 1)) for j in range(3): - grad[j] += (i + 1) * cms[j] / (sc_d ** 2) * R * tmp[i] + grad[j] += (i + 1) * cms[j] / (sc_d * sc_d) * R * tmp[i] for j in range(3): # Siegel and Tornberg has a sign flip here :( grad[j] += ( tmc[j] / (tc_d * sc_d) + - alpha * cms[j] / (tc_d * sc_d ** 3)) * R * derivs[i] + alpha * cms[j] / (tc_d * sc_d * sc_d * sc_d)) * R * derivs[i] R *= (tc_d / sc_d) return @@ -168,22 +168,25 @@ def eval_target_specific_global_qbx_locals( slp = (dipstr is not None) and (dipvec is None) dlp = (dipstr is not None) and (dipvec is not None) + print("Hi from Cython") + if not (slp or dlp): raise ValueError("should specify exactly one of src_weights or dipvec") # Hack to obtain thread-local storage maxthreads = openmp.omp_get_max_threads() - source = np.zeros((maxthreads, 3)) - target = np.zeros((maxthreads, 3)) - center = np.zeros((maxthreads, 3)) - grad = np.zeros((maxthreads, 3)) + # Prevent false sharing by over-allocating the buffers + source = np.zeros((maxthreads, 65)) + target = np.zeros((maxthreads, 65)) + center = np.zeros((maxthreads, 65)) + grad = np.zeros((maxthreads, 65)) # TODO: Check if order > 256 for ictr in cython.parallel.prange(0, global_qbx_centers.shape[0], - nogil=True, schedule="dynamic", - chunksize=10): + nogil=True, schedule="static", + chunksize=128): ctr = global_qbx_centers[ictr] itgt_start = center_to_target_starts[ctr] itgt_end = center_to_target_starts[ctr + 1] diff --git a/setup.py b/setup.py index 89dc02d4..59414845 100644 --- a/setup.py +++ b/setup.py @@ -60,8 +60,8 @@ ext_modules = [ Extension( "pytential.qbx.target_specific", ["pytential/qbx/target_specific.pyx"], - extra_compile_args=['-fopenmp'], - extra_link_args=['-fopenmp'] + extra_compile_args=["-fopenmp"], + extra_link_args=["-fopenmp"] ) ] -- GitLab