diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 3d313f258b421a9f51e2905e66661c1d4b831df1..17b03403ceab36a59b11b742b0bf7ff82ee04e9b 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -55,16 +55,20 @@ class SumpyExpansionWranglerCodeContainer(object): def __init__(self, cl_context, multipole_expansion_factory, - local_expansion_factory, out_kernels): + local_expansion_factory, + out_kernels, exclude_self=False): """ :arg multipole_expansion_factory: a callable of a single argument (order) that returns a multipole expansion. :arg local_expansion_factory: a callable of a single argument (order) that returns a local expansion. + :arg out_kernels: a list of output kernels + :arg exclude_self: whether the self contribution should be excluded """ self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory self.out_kernels = out_kernels + self.exclude_self = exclude_self self.cl_context = cl_context @@ -118,14 +122,15 @@ class SumpyExpansionWranglerCodeContainer(object): @memoize_method def p2p(self): - # FIXME figure out what to do about exclude_self - return P2PFromCSR(self.cl_context, self.out_kernels, exclude_self=False) + return P2PFromCSR(self.cl_context, self.out_kernels, + exclude_self=self.exclude_self) def get_wrangler(self, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs={}, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + self_extra_kwargs=None): return SumpyExpansionWrangler(self, queue, tree, dtype, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs) + source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs) # }}} @@ -155,11 +160,18 @@ class SumpyExpansionWrangler(object): Keyword arguments to be passed to interactions that involve expansions, but not source particles. + + .. attribute:: self_extra_kwargs + + Keyword arguments to be passed for handling + self interactions (source and target particles are the same), + provided special handling is needed """ - def __init__(self, code_container, queue, tree, dtype, fmm_level_to_order, + def __init__(self, code_container, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs, - kernel_extra_kwargs=None): + kernel_extra_kwargs=None, + self_extra_kwargs=None): self.code = code_container self.queue = queue self.tree = tree @@ -174,8 +186,12 @@ class SumpyExpansionWrangler(object): if kernel_extra_kwargs is None: kernel_extra_kwargs = {} + if self_extra_kwargs is None: + self_extra_kwargs = {} + self.source_extra_kwargs = source_extra_kwargs self.kernel_extra_kwargs = kernel_extra_kwargs + self.self_extra_kwargs = self_extra_kwargs self.extra_kwargs = source_extra_kwargs.copy() self.extra_kwargs.update(self.kernel_extra_kwargs) @@ -373,6 +389,7 @@ class SumpyExpansionWrangler(object): pot = self.potential_zeros() kwargs = self.extra_kwargs.copy() + kwargs.update(self.self_extra_kwargs) kwargs.update(self.box_source_list_kwargs()) kwargs.update(self.box_target_list_kwargs()) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 3071a3b88c9b1fc45c7d4ce3470535f084aba0a9..b0f15abb67f100c95dc8ab4b269441134e59e9cb 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -49,7 +49,7 @@ Particle-to-particle # {{{ p2p base class class P2PBase(KernelComputation, KernelCacheWrapper): - def __init__(self, ctx, kernels, exclude_self, strength_usage=None, + def __init__(self, ctx, kernels, exclude_self, strength_usage=None, value_dtypes=None, options=[], name=None, device=None): """ @@ -117,12 +117,8 @@ class P2P(P2PBase): for i, name in enumerate(result_names)] if self.exclude_self: - from pymbolic.primitives import If, ComparisonOperator, Variable - exprs = [ - If( - ComparisonOperator(Variable("isrc"), "!=", Variable("itgt")), - expr, 0) - for expr in exprs] + from pymbolic.primitives import If, Variable + exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( @@ -133,8 +129,10 @@ class P2P(P2PBase): for itgt for isrc """] + loopy_insns + [""" - <> d[idim] = targets[idim,itgt] - sources[idim,isrc] \ - """]+[ + <> d[idim] = targets[idim,itgt] - sources[idim,isrc] + """] + [""" + <> is_self = (source_to_target[isrc] == itgt) + """ if self.exclude_self else ""] + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) @@ -159,7 +157,10 @@ class P2P(P2PBase): lp.GlobalArg("strength", None, shape="nstrengths,nsources"), lp.GlobalArg("result", None, shape="nresults,ntargets", dim_tags="sep,C") - ] + gather_loopy_source_arguments(self.kernels), + ] + ( + [lp.GlobalArg("source_to_target", np.int32, shape=("nsources",))] + if self.exclude_self else [] + ) + gather_loopy_source_arguments(self.kernels), name=self.name, assumptions="nsources>=1 and ntargets>=1") @@ -209,8 +210,6 @@ class P2P(P2PBase): class P2PFromCSR(P2PBase): default_name = "p2p_from_csr" - # FIXME: exclude_self ...? - def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() @@ -220,6 +219,10 @@ class P2PFromCSR(P2PBase): * var("strength").index((self.strength_usage[i], var("isrc"))) for i, name in enumerate(result_names)] + if self.exclude_self: + from pymbolic.primitives import If, Variable + exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] + from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( [ @@ -251,7 +254,9 @@ class P2PFromCSR(P2PBase): <> d[idim] = \ targets[idim,itgt] - sources[idim,isrc] \ {dup=idim} - """ + """] + [""" + <> is_self = (source_to_target[isrc] == itgt) + """ if self.exclude_self else ""] + [ ] + loopy_insns + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, @@ -286,7 +291,10 @@ class P2PFromCSR(P2PBase): lp.ValueArg("nsources", np.int32), lp.ValueArg("ntargets", np.int32), "...", - ] + gather_loopy_source_arguments(self.kernels), + ] + ( + [lp.GlobalArg("source_to_target", np.int32, shape=("nsources",))] + if self.exclude_self else [] + ) + gather_loopy_source_arguments(self.kernels), name=self.name, assumptions="ntgt_boxes>=1") loopy_knl = lp.fix_parameters( diff --git a/test/test_fmm.py b/test/test_fmm.py index 2e2bfea4803b6a5f5e17a6d05379620adff09855..acfcb2fa50c04311d2831d350b14064d80086c19 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -221,6 +221,76 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): pconv_verifier() +def test_sumpy_fmm_exclude_self(ctx_getter): + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + nsources = 500 + dtype = np.float64 + + from boxtree.tools import ( + make_normal_particle_array as p_normal) + + knl = LaplaceKernel(2) + local_expn_class = VolumeTaylorLocalExpansion + mpole_expn_class = VolumeTaylorMultipoleExpansion + order = 10 + + sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(ctx) + weights = rng.uniform(queue, nsources, dtype=np.float64) + + source_to_target = np.arange(tree.nsources, dtype=np.int32) + self_extra_kwargs = {"source_to_target": source_to_target} + + out_kernels = [knl] + + from functools import partial + + from sumpy.fmm import SumpyExpansionWranglerCodeContainer + wcc = SumpyExpansionWranglerCodeContainer( + ctx, + partial(mpole_expn_class, knl), + partial(local_expn_class, knl), + out_kernels, + exclude_self=True) + + wrangler = wcc.get_wrangler(queue, tree, dtype, + fmm_level_to_order=lambda lev: order, + self_extra_kwargs=self_extra_kwargs) + + from boxtree.fmm import drive_fmm + + pot, = drive_fmm(trav, wrangler, weights) + + from sumpy import P2P + p2p = P2P(ctx, out_kernels, exclude_self=True) + evt, (ref_pot,) = p2p(queue, sources, sources, (weights,), + **self_extra_kwargs) + + pot = pot.get() + ref_pot = ref_pot.get() + + rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) + logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) + + assert np.isclose(rel_err, 0, atol=1e-7) + + # You can test individual routines by typing # $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)' diff --git a/test/test_kernels.py b/test/test_kernels.py index 628e2893f3c67a18296badc0fe477dca67fcd4f6..b7b51e7ddc0646b6c50bc97b15dbfd2517700d2b 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -57,7 +57,8 @@ else: faulthandler.enable() -def test_p2p(ctx_getter): +@pytest.mark.parametrize("exclude_self", (True, False)) +def test_p2p(ctx_getter, exclude_self): ctx = ctx_getter() queue = cl.CommandQueue(ctx) @@ -68,26 +69,36 @@ def test_p2p(ctx_getter): lknl = LaplaceKernel(dimensions) knl = P2P(ctx, [lknl, AxisTargetDerivative(0, lknl)], - exclude_self=False) + exclude_self=exclude_self) targets = np.random.rand(dimensions, n) - sources = np.random.rand(dimensions, n) + sources = targets if exclude_self else np.random.rand(dimensions, n) strengths = np.ones(n, dtype=np.float64) + extra_kwargs = {} + + if exclude_self: + extra_kwargs["source_to_target"] = np.arange(n, dtype=np.int32) + evt, (potential, x_derivative) = knl( queue, targets, sources, [strengths], - out_host=True) + out_host=True, **extra_kwargs) potential_ref = np.empty_like(potential) targets = targets.T sources = sources.T for itarg in range(n): - potential_ref[itarg] = np.sum( - strengths - / - np.sum((targets[itarg] - sources)**2, axis=-1)**0.5) + + with np.errstate(divide="ignore"): + invdists = np.sum((targets[itarg] - sources) ** 2, axis=-1) ** -0.5 + + if exclude_self: + assert np.isinf(invdists[itarg]) + invdists[itarg] = 0 + + potential_ref[itarg] = np.sum(strengths * invdists) potential_ref *= 1/(4*np.pi)