diff --git a/.gitignore b/.gitignore index b450b3b751d1f738042168a937b2734d73530a3a..e3905bbf8fe303a113b5a127efc16763dec3639b 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ pytential/_git_rev.py *.so pytential/qbx/target_specific/impl.c +.ccls-cache/ +mem-use.log diff --git a/examples/boundary-layer.py b/examples/boundary-layer.py new file mode 100644 index 0000000000000000000000000000000000000000..05bedaea09543b9c4568c7d0c2bb8b0761f31b29 --- /dev/null +++ b/examples/boundary-layer.py @@ -0,0 +1,330 @@ +# Cases: +# - "line": use line Taylor expansion to compute on-surface. Using LineTaylor +# for local expansion (must force direct evaluation because cannot translate +# from VolumeTaylor to LineTaylor) +# +# - "volume": use volume Taylor expansion to compute anywhere. FMM is used. +# +# - "volume-direct": like the volume case, but with direct evaluation. +# + +enable_mayavi = 0 +if enable_mayavi: + from mayavi import mlab # noqa + +import numpy as np +import pyopencl as cl +from sumpy.expansion import ExpansionFactoryBase, VolumeTaylorExpansionFactory +from sumpy.kernel import ( + LaplaceKernel, HelmholtzKernel, AsymptoticallyInformedKernel) + +from pytential import bind, sym + +from meshmode.mesh.generation import starfish, ellipse, drop # noqa +from meshmode.mesh.refinement import RefinerWithoutAdjacency +from meshmode.discretization.connection import ( + make_refinement_connection, ChainedDiscretizationConnection) + +target_order = 16 +qbx_order = 5 +coarse_nelements = 20 +mode_nr = 23 # frequency of the source density as a cos mode + +k = 100 + 100j + + +class VolumeMultipoleLineLocalTaylorExpansionFactory(ExpansionFactoryBase): + """Volume Taylor for multipole, line Taylor for local, to be used only with + direct evaluation. Used by ``boundary-layer.py`` + """ + def get_local_expansion_class(self, base_kernel): + from sumpy.expansion.local import LineTaylorLocalExpansion + return LineTaylorLocalExpansion + + def get_multipole_expansion_class(self, base_kernel): + from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion + return VolumeTaylorMultipoleExpansion + + +def main(case="line", curve_fn=starfish, visualize=True): + import logging + logging.basicConfig(level=logging.WARN) # INFO for more progress info + + if curve_fn is not None: + dim = 2 + else: + dim = 3 + + # case for expansion type, subcase for operator type + if isinstance(case, tuple): + case, subcase = case + else: + subcase = "S" + + print(f"Running case {case}:{subcase}, dim={dim}") + + if case == "line": + fmm_order = False # cannot use FMM here + if dim == 2: + n_refine_cycles = 5 + expn_factory = VolumeMultipoleLineLocalTaylorExpansionFactory() + elif case[:6] == "volume": + if case[6:] == "-direct": + fmm_order = False + else: + fmm_order = qbx_order + expn_factory = VolumeTaylorExpansionFactory() + + if fmm_order: + if dim == 2: + n_refine_cycles = 7 + else: + if dim == 2: + n_refine_cycles = 5 + else: + raise ValueError + + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + if dim == 2: + from meshmode.mesh.generation import make_curve_mesh + base_mesh = make_curve_mesh( + curve_fn, + np.linspace(0, 1, coarse_nelements+1), + target_order) + elif dim == 3: + pass + refiner = RefinerWithoutAdjacency(base_mesh) + + if case[:6] == "volume": + from pytential.target import PointsTarget + if 1: + # in the volume + from sumpy.visualization import FieldPlotter + fplot = FieldPlotter(np.zeros(dim), extent=2, npoints=100) + targets_dev = cl.array.to_device(queue, fplot.points) + else: + # near surface + assert dim == 2 + target_mesh = make_curve_mesh(curve_fn, np.linspace(0, 1, 83), 1) + targets_dev = cl.array.to_device( + queue, np.ascontiguousarray(target_mesh.vertices)) + + if k: + if k.imag: + kernel = HelmholtzKernel(dim, allow_evanescent=True) + else: + kernel = HelmholtzKernel(dim, allow_evanescent=False) + kernel_kwargs = {"k": sym.var("k")} + else: + kernel = LaplaceKernel(dim) + kernel_kwargs = {} + + from pytential.qbx import QBXLayerPotentialSource + from meshmode.array_context import PyOpenCLArrayContext + from meshmode.discretization import Discretization + from meshmode.discretization.poly_element import \ + InterpolatoryQuadratureSimplexGroupFactory + from meshmode.dof_array import flatten + + actx = PyOpenCLArrayContext(queue) + base_density_discr = Discretization( + actx, base_mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + previous_discr = base_density_discr + + import sympy as sp + dist = sp.Symbol("dist") + k_sym = sp.Symbol("k") + expr = sp.exp(1j * k_sym * dist) + asymp_kernel = AsymptoticallyInformedKernel(kernel, expr) + + def op(**kwargs): + kwargs.update(kernel_kwargs) + if subcase == "S": + return sym.S(kernel, sym.var("sigma"), **kwargs) + elif subcase == "D": + return sym.D(kernel, sym.var("sigma"), **kwargs) + elif subcase == "dS": + return sym.d_dx( + ambient_dim=dim, operand=sym.S(kernel, sym.var("sigma"), **kwargs)) + elif subcase == "dD": + return sym.d_dx( + ambient_dim=dim, operand=sym.D(kernel, sym.var("sigma"), **kwargs)) + else: + raise ValueError(f"unrecognized operator type: {subcase}") + + def op_asymp(**kwargs): + kwargs.update(kernel_kwargs) + if subcase == "S": + return sym.S(asymp_kernel, sym.var("sigma"), **kwargs) + elif subcase == "D": + return sym.D(asymp_kernel, sym.var("sigma"), **kwargs) + elif subcase == "dS": + return sym.d_dx( + ambient_dim=dim, operand=sym.S( + asymp_kernel, sym.var("sigma"), **kwargs)) + elif subcase == "dD": + return sym.d_dx( + ambient_dim=dim, operand=sym.D( + asymp_kernel, sym.var("sigma"), **kwargs)) + else: + raise ValueError(f"unrecognized operator type: {subcase}") + + conns = [] + ns = [] + flds = [] + flds_ref = [] + + for cycle in range(n_refine_cycles): + print(f"\nRefinement cycle {cycle+1}/{n_refine_cycles}") + refiner.refine_uniformly() + mesh = refiner.get_current_mesh() + + pre_density_discr = Discretization( + actx, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + conns.append( + make_refinement_connection( + actx, refiner, previous_discr, + InterpolatoryQuadratureSimplexGroupFactory(target_order))) + ns.append(pre_density_discr.ndofs) + + # by default, qbmax will be used whenever possible + # (have asymptotic information for the out kernels) + qbx = QBXLayerPotentialSource( + pre_density_discr, 4*target_order, qbx_order, + fmm_order=fmm_order, + target_association_tolerance=0.005, + expansion_factory=expn_factory) + + # to disable qbmax explicitly + qbx_ref = qbx.copy(_use_qbmax=False) + + from pytential import GeometryCollection + if case == "line": + places = GeometryCollection({"qbx": qbx}, auto_where="qbx") + places_ref = GeometryCollection({"qbx": qbx_ref}, auto_where="qbx") + elif case[:6] == "volume": + places = GeometryCollection({ + "qbx": qbx, "targets": PointsTarget(targets_dev), + }, auto_where="qbx") + places_ref = GeometryCollection({ + "qbx": qbx_ref, "targets": PointsTarget(targets_dev), + }, auto_where="qbx") + else: + raise ValueError + + density_discr = places.get_discretization("qbx") + assert density_discr is pre_density_discr + previous_discr = density_discr + + from meshmode.dof_array import thaw + nodes = thaw(actx, density_discr.nodes()) + angle = actx.np.arctan2(nodes[1], nodes[0]) + sigma = actx.np.cos(mode_nr*angle) + if dim == 3: + angle2 = actx.np.arctan2(nodes[2], nodes[0]) + sigma *= actx.np.cos(mode_nr*angle2) + + if isinstance(kernel, HelmholtzKernel): + k_val = np.complex128(k) + else: + k_val = np.float64(k) + + if case == "line": + if 1: + fld_ref = bind( + places_ref, op(source="qbx", qbx_forced_limit=-1))( + actx, sigma=sigma, k=k_val) + fld = bind( + places, op_asymp(source="qbx", qbx_forced_limit=-1))( + actx, sigma=sigma, k=k_val) + + # DoFArray + flds.append(fld) + flds_ref.append(fld_ref) + + elif case[:6] == "volume": + if 1: + fld_ref = bind(places_ref, op( + source="qbx", target="targets", qbx_forced_limit=None))( + actx, sigma=sigma, k=k_val) + fld = bind(places, op_asymp( + source="qbx", target="targets", qbx_forced_limit=None))( + actx, sigma=sigma, k=k_val) + + # pyopencl Array + flds.append(fld.get()) + flds_ref.append(fld_ref.get()) + + else: + raise ValueError + + # post-processing + if case == "line": + # everything onto the finest mesh + fs = [] + fs_ref = [] + for iexp in range(len(flds)): + conn = conns[iexp + 1:] + if conn: + total_conn = ChainedDiscretizationConnection(conn) + fs.append(actx.to_numpy(flatten( + total_conn(flds[iexp])))) + fs_ref.append(actx.to_numpy(flatten( + total_conn(flds_ref[iexp])))) + else: + fs.append(actx.to_numpy(flatten( + flds[iexp]))) + fs_ref.append(actx.to_numpy(flatten( + flds_ref[iexp]))) + elif case[:6] == "volume": + fs = flds + fs_ref = flds_ref + else: + raise ValueError + + from functools import partial + inorm = partial(np.linalg.norm, ord=np.inf) + last_diff = inorm(fs_ref[-1] - fs[-1]) / inorm(fs[-1]) + print(f"2*pi/|k| = {2*np.pi / abs(k)}") + print(f"Last diff = {last_diff}") + + # total length of the boundary curve + arclen = bind(places, sym.integral(dim, dim-1, operand=1, dofdesc="qbx"))(actx) + + from pytools.convergence import EOCRecorder + eocrec = EOCRecorder() + eocrec_ref = EOCRecorder() + eocrec_crossref = EOCRecorder() + for iexp in range(len(fs) - 1): + eocrec.add_data_point( + arclen/ns[iexp], + inorm(fs[iexp] - fs[-1]) / inorm(fs[-1])) + eocrec_ref.add_data_point( + arclen/ns[iexp], + inorm(fs_ref[iexp] - fs_ref[-1]) / inorm(fs_ref[-1])) + eocrec_crossref.add_data_point( + arclen/ns[iexp], + inorm(fs[iexp] - fs_ref[-1]) / inorm(fs_ref[-1])) + + print("\nQBX (without scaling):") + print(eocrec_ref) + + print("\nQBMAX against QBX:") + print(eocrec_crossref) + + print("\nQBMAX (with scaling):") + print(eocrec) + + +if __name__ == "__main__": + import sys + if len(sys.argv) == 2: + main(sys.argv[1]) + elif len(sys.argv) == 3: + main((sys.argv[1], sys.argv[2])) + else: + main() diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index a970ccd5c8ff88984f5c780279cc8bd07984a14b..c3c50c354c72b46d940469142c3b4f383a5d78d9 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -83,6 +83,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_min_nsources_cumul=None, _tree_kind="adaptive", _use_target_specific_qbx=None, + _use_qbmax=None, geometry_data_inspector=None, cost_model=None, fmm_backend="sumpy", @@ -107,6 +108,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): :arg cost_model: Either *None* or an object implementing the :class:`~pytential.qbx.cost.AbstractQBXCostModel` interface, used for gathering modeled costs if provided (experimental) + :arg _use_qbmax: Whether to use QBMAX. *None* means "use if possible" """ # {{{ argument processing @@ -206,6 +208,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_min_nsources_cumul self._tree_kind = _tree_kind self._use_target_specific_qbx = _use_target_specific_qbx + self._use_qbmax = _use_qbmax self.geometry_data_inspector = geometry_data_inspector if cost_model is None: @@ -235,6 +238,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _from_sep_smaller_crit=None, _tree_kind=None, _use_target_specific_qbx=_not_provided, + _use_qbmax=_not_provided, geometry_data_inspector=None, cost_model=_not_provided, fmm_backend=None, @@ -321,6 +325,10 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): _use_target_specific_qbx=(_use_target_specific_qbx if _use_target_specific_qbx is not _not_provided else self._use_target_specific_qbx), + _use_qbmax=( + self._use_qbmax + if _use_qbmax is _not_provided + else _use_qbmax), geometry_data_inspector=( geometry_data_inspector or self.geometry_data_inspector), cost_model=( @@ -540,12 +548,32 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): if self.fmm_backend == "sumpy": from pytential.qbx.fmm import \ QBXSumpyExpansionWranglerCodeContainer + + if self._use_qbmax is None or self._use_qbmax: + from sumpy.kernel import (AsymptoticallyInformedKernel, + ExpaniosnClassReplacer) + ecr = ExpaniosnClassReplacer(local_expn_class) + source_kernels = tuple(ecr(sk) for sk in source_kernels) + target_kernels = tuple(ecr(tk) for tk in target_kernels) + if any(isinstance(tk, AsymptoticallyInformedKernel) + for tk in target_kernels) or any( + isinstance(sk, AsymptoticallyInformedKernel) + for sk in source_kernels): + qbmax_local_factory = partial(local_expn_class, base_kernel) + else: + qbmax_local_factory = None + else: + qbmax_local_factory = None + return QBXSumpyExpansionWranglerCodeContainer( self.cl_context, fmm_mpole_factory, fmm_local_factory, qbx_local_factory, - target_kernels=target_kernels, source_kernels=source_kernels) + target_kernels=target_kernels, source_kernels=source_kernels, + _qbmax_local_expansion_factory=qbmax_local_factory) elif self.fmm_backend == "fmmlib": + if self._use_qbmax: + raise ValueError("QBMAX is not implemented in fmmlib") source_kernel, = source_kernels target_kernels_new = [ target_kernel.replace_base_kernel(source_kernel) for @@ -711,13 +739,20 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): value_dtype = self.density_discr.real_dtype base_kernel = single_valued(knl.get_base_kernel() for knl in source_kernels) + expansion = self.get_expansion_for_qbx_direct_eval( + base_kernel, target_kernels) + + if self._use_qbmax is None or self._use_qbmax: + from sumpy.kernel import ExpaniosnClassReplacer + ecr = ExpaniosnClassReplacer(type(expansion)) + source_kernels = tuple(ecr(sk) for sk in source_kernels) + target_kernels = tuple(ecr(tk) for tk in target_kernels) from sumpy.qbx import LayerPotential return LayerPotential(self.cl_context, - expansion=self.get_expansion_for_qbx_direct_eval( - base_kernel, target_kernels), + expansion=expansion, target_kernels=target_kernels, source_kernels=source_kernels, - value_dtypes=value_dtype) + value_dtypes=value_dtype, _use_qbmax=self._use_qbmax) @memoize_method def get_lpot_applier_on_tgt_subset(self, target_kernels, source_kernels): @@ -730,13 +765,18 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): base_kernel = single_valued(knl.get_base_kernel() for knl in source_kernels) - from pytential.qbx.direct import LayerPotentialOnTargetAndCenterSubset from sumpy.expansion.local import VolumeTaylorLocalExpansion + from sumpy.kernel import ExpaniosnClassReplacer + ecr = ExpaniosnClassReplacer(VolumeTaylorLocalExpansion) + source_kernels = tuple(ecr(knl) for knl in source_kernels) + target_kernels = tuple(ecr(knl) for knl in target_kernels) + + from pytential.qbx.direct import LayerPotentialOnTargetAndCenterSubset return LayerPotentialOnTargetAndCenterSubset( self.cl_context, expansion=VolumeTaylorLocalExpansion(base_kernel, self.qbx_order), target_kernels=target_kernels, source_kernels=source_kernels, - value_dtypes=value_dtype) + value_dtypes=value_dtype, _use_qbmax=self._use_qbmax) @memoize_method def get_qbx_target_numberer(self, dtype): @@ -924,6 +964,18 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): for i, res_i in enumerate(output_for_each_kernel): tgt_subset_kwargs[f"result_{i}"] = res_i + from sumpy.kernel import (AsymptoticallyInformedKernel, + TargetDerivativeRemover) + tdr = TargetDerivativeRemover() + if (self._use_qbmax is None or self._use_qbmax) and any([ + isinstance(tdr(knl), AsymptoticallyInformedKernel) + for knl in insn.target_kernels]): + qbmax_extra_kwargs = { + "nodes_of_qbx_balls_tangency": geo_data.flat_stage1_nodes() + } + else: + qbmax_extra_kwargs = {} + if qbx_tgt_count: lpot_applier_on_tgt_subset( queue, @@ -934,7 +986,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): strengths=flat_strengths, qbx_tgt_numbers=qbx_tgt_numbers, qbx_center_numbers=qbx_center_numbers, - **tgt_subset_kwargs) + **tgt_subset_kwargs, **qbmax_extra_kwargs) for i, o in outputs: result = output_for_each_kernel[o.target_kernel_index] diff --git a/pytential/qbx/direct.py b/pytential/qbx/direct.py index bbb0f7aa18d3ab3f7c8ba5289298c00dd362afcf..f60b6979a2809f8cb3bc8b8e4b3e5d213db15162 100644 --- a/pytential/qbx/direct.py +++ b/pytential/qbx/direct.py @@ -37,9 +37,6 @@ class LayerPotentialOnTargetAndCenterSubset(LayerPotentialBase): return super().get_cache_key() + (PYTENTIAL_KERNEL_VERSION,) def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() - kernel_exprs = self.get_kernel_exprs(result_names) - from sumpy.tools import gather_loopy_source_arguments arguments = ( gather_loopy_source_arguments((self.expansion,) @@ -68,6 +65,64 @@ class LayerPotentialOnTargetAndCenterSubset(LayerPotentialBase): shape="ntargets_total", order="C") for i in range(len(self.target_kernels))]) + # scaling at target for qbmax + sats = [] + sat_insns_data = [] + for iknl, knl in enumerate(self.target_kernels): + from sumpy.kernel import TargetDerivativeRemover + tdr = TargetDerivativeRemover() + try: + raw_knl = tdr(knl) + sat_expr = raw_knl.get_scaling_expression(None) + except AttributeError: + sat_expr = None + + if sat_expr: + # inject the multiplicative scales of each output kernel + sats.append(f"sat_{iknl} * ") + sat_insns_data.append((f"sat_{iknl}", sat_expr)) + else: + sats.append("") + + if sat_insns_data: + # inject auxiliary insns for qbmax expansion + if 0: + # just use bvec (as if boundary layer around the disk) + qbmax_local_temp_insns = [""" + <> normal[idim] = tgt[idim, itgt] - \ + center[idim, icenter] {dup=idim} + """] + else: + qbmax_local_temp_insns = [""" + <> normal[idim] = \ + nodes_of_qbx_balls_tangency[idim, icenter//2] - \ + center[idim, icenter] {dup=idim} + """] + + arguments += ( + lp.GlobalArg("nodes_of_qbx_balls_tangency", None, + shape=(self.dim, "ncenters_total/2"), dim_tags="sep,C"), + ) + + # treat the injected "normal" as a vector during codegen + # must be before calling self.get_loopy_insns_and_result_names() + self._vector_names.add("normal") + + from sumpy.codegen import to_loopy_insns + sat_insns = to_loopy_insns( + sat_insns_data, + vector_names=self._vector_names, + retain_names=[datpair[0] for datpair in sat_insns_data], + complex_dtype=np.complex128) # FIXME + + else: + # no qbmax + qbmax_local_temp_insns = [] + sat_insns = [] + + loopy_insns, result_names = self.get_loopy_insns_and_result_names() + kernel_exprs = self.get_kernel_exprs(result_names) + loopy_knl = lp.make_kernel([ "{[itgt_local]: 0 <= itgt_local < ntargets}", "{[isrc]: 0 <= isrc < nsources}", @@ -86,12 +141,13 @@ class LayerPotentialOnTargetAndCenterSubset(LayerPotentialBase): """] + [f"<> strength_{i}_isrc = strength_{i}[isrc]" for i in range(self.strength_count)] + + qbmax_local_temp_insns + sat_insns + loopy_insns + kernel_exprs + [""" - result_{i}[itgt] = knl_{i}_scaling * \ + result_{i}[itgt] = knl_{i}_scaling * {sat} \ simul_reduce(sum, isrc, pair_result_{i}) \ {{inames=itgt_local}} - """.format(i=iknl) + """.format(i=iknl, sat=sats[iknl]) for iknl in range(len(self.target_kernels))] + ["end"], arguments, diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index 787c44e566b0b22847039929282b5aa99827a176..a6c83b60b4f34a41e359c9a4d4701eba21d86b82 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -28,7 +28,8 @@ from sumpy.fmm import (SumpyExpansionWranglerCodeContainer, SumpyExpansionWrangler, level_to_rscale, SumpyTimingFuture) from pytools import memoize_method -from pytential.qbx.interactions import P2QBXLFromCSR, M2QBXL, L2QBXL, QBXL2P +from pytential.qbx.interactions import (P2QBXLFromCSR, P2QBMAXLFromCSR, + M2QBXL, L2QBXL, QBXL2P, QBMAXL2P) from boxtree.fmm import TimingRecorder from pytools import log_process, ProcessLogger @@ -51,22 +52,33 @@ __doc__ = """ class QBXSumpyExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer): def __init__(self, cl_context, multipole_expansion_factory, local_expansion_factory, - qbx_local_expansion_factory, target_kernels, source_kernels): + qbx_local_expansion_factory, target_kernels, source_kernels, + _qbmax_local_expansion_factory=None): SumpyExpansionWranglerCodeContainer.__init__(self, cl_context, multipole_expansion_factory, local_expansion_factory, target_kernels=target_kernels, source_kernels=source_kernels) self.qbx_local_expansion_factory = qbx_local_expansion_factory + self.qbmax_local_expansion_factory = _qbmax_local_expansion_factory @memoize_method def qbx_local_expansion(self, order): return self.qbx_local_expansion_factory(order, self.use_rscale) + @memoize_method + def qbmax_local_expansion(self, order): + return self.qbmax_local_expansion_factory(order, self.use_rscale) + @memoize_method def p2qbxl(self, order): return P2QBXLFromCSR(self.cl_context, self.qbx_local_expansion(order), kernels=self.source_kernels) + @memoize_method + def p2qbmaxl(self, order): + return P2QBMAXLFromCSR(self.cl_context, + self.qbmax_local_expansion(order), kernels=self.source_kernels) + @memoize_method def m2qbxl(self, source_order, target_order): return M2QBXL(self.cl_context, @@ -85,6 +97,12 @@ class QBXSumpyExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer self.qbx_local_expansion_factory(order), self.target_kernels) + @memoize_method + def qbmaxl2p(self, order): + return QBMAXL2P(self.cl_context, + self.qbmax_local_expansion_factory(order), + self.target_kernels) + def get_wrangler(self, queue, geo_data, dtype, qbx_order, fmm_level_to_order, source_extra_kwargs=None, @@ -134,7 +152,10 @@ non_qbx_box_target_lists`), self.qbx_order = qbx_order self.geo_data = geo_data - self.using_tsqbx = False + + @property + def using_qbmax(self): + return self.code.qbmax_local_expansion_factory is not None # {{{ data vector utilities @@ -208,6 +229,9 @@ non_qbx_box_target_lists`), local_exps = self.qbx_local_expansion_zeros() events = [] + if self.using_qbmax: + return (local_exps, SumpyTimingFuture(self.queue, events)) + geo_data = self.geo_data if len(geo_data.global_qbx_centers()) == 0: return (local_exps, SumpyTimingFuture(self.queue, events)) @@ -242,6 +266,49 @@ non_qbx_box_target_lists`), return (result, SumpyTimingFuture(self.queue, events)) + @log_process(logger) + def form_qbmax_locals(self, src_weight_vecs): + ta_local_exps = self.qbx_local_expansion_zeros() + events = [] + + if not self.using_qbmax: + return (ta_local_exps, SumpyTimingFuture(self.queue, events)) + + geo_data = self.geo_data + if len(geo_data.global_qbx_centers()) == 0: + return (ta_local_exps, SumpyTimingFuture(self.queue, events)) + + traversal = geo_data.traversal() + + starts = traversal.neighbor_source_boxes_starts + lists = traversal.neighbor_source_boxes_lists + + kwargs = self.extra_kwargs.copy() + kwargs.update(self.box_source_list_kwargs()) + + p2qbmaxl = self.code.p2qbmaxl(self.qbx_order) + + evt, (result,) = p2qbmaxl( + self.queue, + global_qbx_centers=geo_data.global_qbx_centers(), + qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), + qbx_centers=geo_data.flat_centers(), + nodes_of_qbx_balls_tangency=geo_data.flat_stage1_nodes(), + qbx_expansion_radii=geo_data.flat_expansion_radii(), + + source_box_starts=starts, + source_box_lists=lists, + strengths=src_weight_vecs, + qbmax_expansions=ta_local_exps, + + **kwargs) + + events.append(evt) + assert ta_local_exps is result + result.add_event(evt) + + return (result, SumpyTimingFuture(self.queue, events)) + @log_process(logger) def translate_box_multipoles_to_qbx_local(self, multipole_exps): qbx_expansions = self.qbx_local_expansion_zeros() @@ -381,6 +448,46 @@ non_qbx_box_target_lists`), def eval_target_specific_qbx_locals(self, src_weight_vecs): return (self.full_output_zeros(), SumpyTimingFuture(self.queue, events=())) + @log_process(logger) + def eval_qbmax_locals(self, qbmax_expansions): + pot = self.full_output_zeros() + + geo_data = self.geo_data + events = [] + + if not self.using_qbmax: + return (pot, SumpyTimingFuture(self.queue, events)) + + if len(geo_data.global_qbx_centers()) == 0: + return (pot, SumpyTimingFuture(self.queue, events)) + + ctt = geo_data.center_to_tree_targets() + + qbmaxl2p = self.code.qbmaxl2p(self.qbx_order) + + evt, pot_res = qbmaxl2p( + self.queue, + qbx_centers=geo_data.flat_centers(), + nodes_of_qbx_balls_tangency=geo_data.flat_stage1_nodes(), + qbx_expansion_radii=geo_data.flat_expansion_radii(), + + global_qbx_centers=geo_data.global_qbx_centers(), + + center_to_targets_starts=ctt.starts, + center_to_targets_lists=ctt.lists, + + targets=self.tree.targets, + + qbmax_expansions=qbmax_expansions, + result=pot, + + **self.kernel_extra_kwargs.copy()) + + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i + + return (pot, SumpyTimingFuture(self.queue, events)) + # }}} # }}} @@ -537,6 +644,7 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None, # for the same interactions (directly evaluated portion of the potentials # via unified List 1). Which one is used depends on the wrangler. If one of # them is unused the corresponding output entries will be zero. + # (QBMAX uses neither) qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weight_vecs) @@ -567,6 +675,18 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None, recorder.add("eval_target_specific_qbx_locals", timing_future) + qbmax_expansions, timing_future = \ + wrangler.form_qbmax_locals(src_weight_vecs) + + # recorder.add("form_qbmax_locals", timing_future) + + ta_result, timing_future = \ + wrangler.eval_qbmax_locals(qbmax_expansions) + + qbx_potentials = qbx_potentials + ta_result + + # recorder.add("eval_qbmax_locals", timing_future) + # }}} # {{{ reorder potentials diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index d24a50990855e8b4cc4fb6be42008bbbd108006f..8c64b5cc575caca8b55864c859fb1318d5dc5f70 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -453,6 +453,16 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): # }}} + @log_process(logger) + @return_timing_data + def form_qbmax_locals(self, src_weight_vecs): + return self.qbx_local_expansion_zeros() + + @log_process(logger) + @return_timing_data + def eval_qbmax_locals(self, qbmax_expansions): + return self.full_output_zeros() + @log_process(logger) @return_timing_data def translate_box_local_to_qbx_local(self, local_exps): diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py index f3623edcb7ebc9fe5da2da3d9f171ecfc583db4f..4353935e5b5e2bbffb39c60e4b386ea2434ccdc8 100644 --- a/pytential/qbx/geometry.py +++ b/pytential/qbx/geometry.py @@ -456,6 +456,23 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface): return self.array_context.freeze(flatten(radii)) + @memoize_method + def flat_stage1_nodes(self): + """Stage 1 nodes associated with centers during QBX construction. They + are the points of tangency between QBX expansion balls and the source + geometry. + + centers = nodes + sides * radii * normals + """ + from pytential import bind, sym + + nodes = bind(self.places, sym.nodes( + self.ambient_dim, + dofdesc=self.source_dd.to_stage1()).as_vector())( + self.array_context) + + return obj_array_vectorize(self.array_context.freeze, flatten(nodes)) + # }}} # {{{ target info diff --git a/pytential/qbx/interactions.py b/pytential/qbx/interactions.py index f08366a03cb1ea7e2f8f236f9a28d9e30164ee90..afedc65d65ffbba9151d075027193c86bc9cf0c4 100644 --- a/pytential/qbx/interactions.py +++ b/pytential/qbx/interactions.py @@ -135,6 +135,116 @@ class P2QBXLFromCSR(P2EBase): # }}} +# {{{ form qbmax expansions from points + +class P2QBMAXLFromCSR(P2EBase): + default_name = "p2qbmaxl_from_csr" + + def get_cache_key(self): + return super().get_cache_key() + (PYTENTIAL_KERNEL_VERSION,) + + def get_kernel(self): + ncoeffs = len(self.expansion) + + # symbolic vector injected by qbmax + self._vector_names.add("normal") + + from sumpy.tools import gather_loopy_source_arguments + arguments = ( + [ + lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), + dim_tags="sep,c"), + lp.GlobalArg("strengths", None, dim_tags="sep,c", + shape="strength_count, nsources"), + lp.GlobalArg("qbx_center_to_target_box", + None, shape=None), + lp.GlobalArg("source_box_starts,source_box_lists", + None, shape=None), + lp.GlobalArg("box_source_starts,box_source_counts_nonchild", + None, shape=None), + lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", + dim_tags="sep,c"), + lp.GlobalArg("nodes_of_qbx_balls_tangency", None, + shape=(self.dim, "ncenters/2"), dim_tags="sep,C"), + lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"), + lp.GlobalArg("qbmax_expansions", None, + shape=("ncenters", ncoeffs)), + lp.ValueArg("ncenters", np.int32), + lp.ValueArg("nsources", np.int32), + "..." + ] + gather_loopy_source_arguments( + self.source_kernels + (self.expansion,)) + ) + + loopy_knl = lp.make_kernel( + [ + "{[itgt_center]: 0<=itgt_center tgt_icenter = global_qbx_centers[itgt_center] + + <> center[idim] = qbx_centers[idim, tgt_icenter] {dup=idim} + <> rscale = qbx_expansion_radii[tgt_icenter] + <> normal[idim] = \ + nodes_of_qbx_balls_tangency[idim, tgt_icenter//2] - \ + qbx_centers[idim, tgt_icenter] {dup=idim} + + <> itgt_box = qbx_center_to_target_box[tgt_icenter] + + <> isrc_box_start = source_box_starts[itgt_box] + <> isrc_box_stop = source_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = source_box_lists[isrc_box] + <> isrc_start = box_source_starts[src_ibox] + <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] + + for isrc + <> a[idim] = center[idim] - sources[idim, isrc] \ + {dup=idim} + """] + [f"<> strength_{i} = strengths[{i}, isrc]" for + i in set(self.strength_usage)] + + self.get_loopy_instructions(isotropic_expansion=False) + [""" + end + end + + """] + [f""" + qbmax_expansions[tgt_icenter, {i}] = \ + simul_reduce(sum, (isrc_box, isrc), coeff{i}) \ + {{id_prefix=write_expn}} + """ for i in range(ncoeffs)] + [""" + + end + """], + arguments, + name=self.name, assumptions="ntgt_centers>=1", + silenced_warnings="write_race(write_expn*)", + fixed_parameters=dict(dim=self.dim, + strength_count=self.strength_count), + lang_version=MOST_RECENT_LANGUAGE_VERSION) + + for knl in self.source_kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + knl = self.get_kernel() + knl = lp.split_iname(knl, "itgt_center", 16, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + return self.get_cached_optimized_kernel()(queue, **kwargs) + +# }}} + + # {{{ translation from (likely, list 3) multipoles to qbx expansions class M2QBXL(E2EBase): @@ -440,4 +550,115 @@ class QBXL2P(E2PBase): # }}} + +# {{{ evaluation of qbmax expansions + +class QBMAXL2P(E2PBase): + default_name = "qbmax_potential_from_local" + + def get_cache_key(self): + return super().get_cache_key() + (PYTENTIAL_KERNEL_VERSION,) + + def get_kernel(self): + ncoeffs = len(self.expansion) + + self._vector_names.add("normal") + loopy_insns, result_names = self.get_loopy_insns_and_result_names( + use_qbmax=True) + + from sumpy.kernel import AsymptoticScalingGetter + asg = AsymptoticScalingGetter() + sat_exprs = [asg(knl) for knl in self.kernels] + sats = [f"sat{i} * " for i in range(len(result_names))] + sat_names = [f"sat{i}" for i in range(len(result_names))] + + from sumpy.codegen import to_loopy_insns + sat_insns = to_loopy_insns( + [(sn, se) for sn, se in zip(sat_names, sat_exprs)], + vector_names=self._vector_names, + pymbolic_expr_maps=[knl.get_code_transformer() + for knl in self.kernels], + retain_names=sat_names, + complex_dtype=np.complex128) + + loopy_knl = lp.make_kernel( + [ + "{[iglobal_center]: 0<=iglobal_center src_icenter = global_qbx_centers[iglobal_center] + + <> icenter_tgt_start = center_to_targets_starts[src_icenter] + <> icenter_tgt_end = center_to_targets_starts[src_icenter+1] + + <> center[idim] = qbx_centers[idim, src_icenter] {dup=idim} + <> rscale = qbx_expansion_radii[src_icenter] + <> normal[idim] = \ + nodes_of_qbx_balls_tangency[idim, src_icenter//2] - \ + qbx_centers[idim, src_icenter] {dup=idim} + + for icenter_tgt + + <> center_itgt = center_to_targets_lists[icenter_tgt] + + <> b[idim] = targets[idim, center_itgt] - center[idim] + + """] + [""" + <> coeff{i} = qbmax_expansions[src_icenter, {i}] + """.format(i=i) for i in range(ncoeffs)] + [ + + ] + loopy_insns + [""" + + result[{i},center_itgt] = {sat} kernel_scaling \ + * result_{i}_p {{id_prefix=write_result}} + """.format(i=i, sat=sats[i]) + for i in range(len(result_names))] + [""" + end + end + """], + [ + lp.GlobalArg("result", None, shape="nresults, ntargets", + dim_tags="sep,C"), + lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", + dim_tags="sep,c"), + lp.GlobalArg("nodes_of_qbx_balls_tangency", None, + shape=(self.dim, "ncenters/2"), dim_tags="sep,C"), + lp.GlobalArg("center_to_targets_starts,center_to_targets_lists", + None, shape=None), + lp.GlobalArg("qbmax_expansions", None, + shape=("ncenters", ncoeffs)), + lp.GlobalArg("targets", None, shape=(self.dim, "ntargets"), + dim_tags="sep,C"), + lp.ValueArg("ncenters,ntargets", np.int32), + "..." + ] + [arg.loopy_arg for arg in self.expansion.get_args()], + name=self.name, + assumptions="nglobal_qbx_centers>=1", + silenced_warnings="write_race(write_result*)", + fixed_parameters=dict(dim=self.dim, nresults=len(result_names)), + lang_version=MOST_RECENT_LANGUAGE_VERSION) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + for knl in self.kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + knl = self.get_kernel() + knl = lp.tag_inames(knl, dict(iglobal_center="g.0")) + knl = self._allow_redundant_execution_of_knl_scaling(knl) + return knl + + def __call__(self, queue, **kwargs): + return self.get_cached_optimized_kernel()(queue, **kwargs) + +# }}} + # vim: foldmethod=marker diff --git a/requirements.txt b/requirements.txt index 4c26b26a598cbbdf72986e77b2d4b729a887d8a3..2b4105b0ab3cb662af7e232e14645b0ef171dfeb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,6 @@ git+https://github.com/inducer/loopy.git#egg=loopy git+https://github.com/inducer/boxtree.git#egg=boxtree git+https://github.com/inducer/arraycontext.git#egg=arraycontext git+https://github.com/inducer/meshmode.git#egg=meshmode -git+https://github.com/inducer/sumpy.git#egg=sumpy +# git+https://github.com/inducer/sumpy.git#egg=sumpy +git+https://gitlab.tiker.net/inducer/sumpy.git@boundary-layer#egg=sumpy git+https://github.com/inducer/pyfmmlib.git#egg=pyfmmlib diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 1f389aeb66e2e1f57f4916c428ffe481fddc0fa3..6ba48e5243752ce391d1c2fddb9880eef520b434 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -481,7 +481,8 @@ def test_cost_model_metadata_gathering(actx_factory): class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): - def __init__(self, queue, geo_data, use_target_specific_qbx): + def __init__(self, queue, geo_data, use_target_specific_qbx, + use_target_asymptotic_qbx=False): from pytential.qbx.utils import ToHostTransferredGeoDataWrapper geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) @@ -491,6 +492,7 @@ class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): use_target_specific_qbx # None means use by default if possible or use_target_specific_qbx is None) + self.using_taqbx = use_target_asymptotic_qbx ConstantOneExpansionWrangler.__init__(self, geo_data.tree()) @@ -652,6 +654,24 @@ class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): return pot, self.timing_future(ops) + def form_qbmax_locals(self, src_weight_vecs): + ta_local_exps = self.qbx_local_expansion_zeros() + ops = 0 + + if not self.using_taqbx: + return ta_local_exps, self.timing_future(ops) + + raise NotImplementedError + + def eval_qbmax_locals(self, taqbx_expansions): + pot = self.full_output_zeros() + ops = 0 + + if not self.using_taqbx: + return pot, self.timing_future(ops) + + raise NotImplementedError + # }}} diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py index bd91b1b08f7e584f29a8427a6d570f5a69f88dee..6c07389c4c37b34b6649b3bcac8a53003eea0ae3 100644 --- a/test/test_layer_pot.py +++ b/test/test_layer_pot.py @@ -77,7 +77,8 @@ def test_geometry(actx_factory): # {{{ test off-surface eval @pytest.mark.parametrize("use_fmm", [True, False]) -def test_off_surface_eval(actx_factory, use_fmm, visualize=False): +@pytest.mark.parametrize("use_qbmax", [True, False]) +def test_off_surface_eval(actx_factory, use_fmm, use_qbmax, visualize=False): logging.basicConfig(level=logging.INFO) actx = actx_factory() @@ -120,7 +121,15 @@ def test_off_surface_eval(actx_factory, use_fmm, visualize=False): density_discr = places.get_discretization(places.auto_source.geometry) from sumpy.kernel import LaplaceKernel - op = sym.D(LaplaceKernel(2), sym.var("sigma"), qbx_forced_limit=-2) + if use_qbmax: + from sumpy.kernel import AsymptoticallyInformedKernel + import sympy as sp + # no, this scaling doesn't do anything but test the machinary + knl = AsymptoticallyInformedKernel( + LaplaceKernel(2), sp.exp(-sp.Symbol("dist"))) + else: + knl = LaplaceKernel(2) + op = sym.D(knl, sym.var("sigma"), qbx_forced_limit=-2) sigma = density_discr.zeros(actx) + 1 fld_in_vol = bind(places, op)(actx, sigma=sigma) @@ -143,7 +152,8 @@ def test_off_surface_eval(actx_factory, use_fmm, visualize=False): # {{{ test off-surface eval vs direct -def test_off_surface_eval_vs_direct(actx_factory, do_plot=False): +@pytest.mark.parametrize("use_qbmax", [True, False]) +def test_off_surface_eval_vs_direct(actx_factory, use_qbmax, do_plot=False): logging.basicConfig(level=logging.INFO) actx = actx_factory() @@ -183,6 +193,14 @@ def test_off_surface_eval_vs_direct(actx_factory, do_plot=False): from pytential.target import PointsTarget ptarget = PointsTarget(actx.freeze(actx.from_numpy(fplot.points))) from sumpy.kernel import LaplaceKernel + if use_qbmax: + from sumpy.kernel import AsymptoticallyInformedKernel + import sympy as sp + # no, this scaling doesn't do anything but test the machinary + knl = AsymptoticallyInformedKernel( + LaplaceKernel(2), sp.exp(-sp.Symbol("dist"))) + else: + knl = LaplaceKernel(2) places = GeometryCollection({ "direct_qbx": direct_qbx, @@ -194,7 +212,7 @@ def test_off_surface_eval_vs_direct(actx_factory, do_plot=False): fmm_density_discr = places.get_discretization("fmm_qbx") from pytential.qbx import QBXTargetAssociationFailedException - op = sym.D(LaplaceKernel(2), sym.var("sigma"), qbx_forced_limit=None) + op = sym.D(knl, sym.var("sigma"), qbx_forced_limit=None) try: direct_sigma = direct_density_discr.zeros(actx) + 1 direct_fld_in_vol = bind(places, op, @@ -230,7 +248,8 @@ def test_off_surface_eval_vs_direct(actx_factory, do_plot=False): # {{{ -def test_single_plus_double_with_single_fmm(actx_factory, do_plot=False): +@pytest.mark.parametrize("use_qbmax", [True, False]) +def test_single_plus_double_with_single_fmm(actx_factory, use_qbmax, do_plot=False): logging.basicConfig(level=logging.INFO) actx = actx_factory() @@ -270,6 +289,14 @@ def test_single_plus_double_with_single_fmm(actx_factory, do_plot=False): from pytential.target import PointsTarget ptarget = PointsTarget(actx.freeze(actx.from_numpy(fplot.points))) from sumpy.kernel import LaplaceKernel + if use_qbmax: + from sumpy.kernel import AsymptoticallyInformedKernel + import sympy as sp + # no, this scaling doesn't do anything but test the machinary + knl = AsymptoticallyInformedKernel( + LaplaceKernel(2), sp.exp(-sp.Symbol("dist"))) + else: + knl = LaplaceKernel(2) places = GeometryCollection({ "direct_qbx": direct_qbx, @@ -280,7 +307,6 @@ def test_single_plus_double_with_single_fmm(actx_factory, do_plot=False): direct_density_discr = places.get_discretization("direct_qbx") fmm_density_discr = places.get_discretization("fmm_qbx") - knl = LaplaceKernel(2) from pytential.qbx import QBXTargetAssociationFailedException op_d = sym.D(knl, sym.var("sigma"), qbx_forced_limit=None) op_s = sym.S(knl, sym.var("sigma"), qbx_forced_limit=None)