From 8736b6c30b5be931bc939f321fac8ba4a944d729 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 21 Apr 2017 19:28:14 -0500
Subject: [PATCH] P2P: Add support for exclude_self in the FMM.

This changes the P2P stage to be aware of self interactions in the FMM
and to exclude them, if desired. Self interactions are specified by
passing an extra array called *source_to_target* which matches source
numbers to target numbers.
---
 sumpy/fmm.py         | 31 +++++++++++++++-----
 sumpy/p2p.py         | 36 ++++++++++++++---------
 test/test_fmm.py     | 70 ++++++++++++++++++++++++++++++++++++++++++++
 test/test_kernels.py | 27 ++++++++++++-----
 4 files changed, 135 insertions(+), 29 deletions(-)

diff --git a/sumpy/fmm.py b/sumpy/fmm.py
index 3d313f25..17b03403 100644
--- a/sumpy/fmm.py
+++ b/sumpy/fmm.py
@@ -55,16 +55,20 @@ class SumpyExpansionWranglerCodeContainer(object):
 
     def __init__(self, cl_context,
             multipole_expansion_factory,
-            local_expansion_factory, out_kernels):
+            local_expansion_factory,
+            out_kernels, exclude_self=False):
         """
         :arg multipole_expansion_factory: a callable of a single argument (order)
             that returns a multipole expansion.
         :arg local_expansion_factory: a callable of a single argument (order)
             that returns a local expansion.
+        :arg out_kernels: a list of output kernels
+        :arg exclude_self: whether the self contribution should be excluded
         """
         self.multipole_expansion_factory = multipole_expansion_factory
         self.local_expansion_factory = local_expansion_factory
         self.out_kernels = out_kernels
+        self.exclude_self = exclude_self
 
         self.cl_context = cl_context
 
@@ -118,14 +122,15 @@ class SumpyExpansionWranglerCodeContainer(object):
 
     @memoize_method
     def p2p(self):
-        # FIXME figure out what to do about exclude_self
-        return P2PFromCSR(self.cl_context, self.out_kernels, exclude_self=False)
+        return P2PFromCSR(self.cl_context, self.out_kernels,
+                          exclude_self=self.exclude_self)
 
     def get_wrangler(self, queue, tree, dtype, fmm_level_to_order,
             source_extra_kwargs={},
-            kernel_extra_kwargs=None):
+            kernel_extra_kwargs=None,
+            self_extra_kwargs=None):
         return SumpyExpansionWrangler(self, queue, tree, dtype, fmm_level_to_order,
-                source_extra_kwargs, kernel_extra_kwargs)
+                source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs)
 
 # }}}
 
@@ -155,11 +160,18 @@ class SumpyExpansionWrangler(object):
 
         Keyword arguments to be passed to interactions that involve
         expansions, but not source particles.
+
+    .. attribute:: self_extra_kwargs
+
+        Keyword arguments to be passed for handling
+        self interactions (source and target particles are the same),
+        provided special handling is needed
     """
 
-    def __init__(self, code_container,  queue, tree, dtype, fmm_level_to_order,
+    def __init__(self, code_container, queue, tree, dtype, fmm_level_to_order,
             source_extra_kwargs,
-            kernel_extra_kwargs=None):
+            kernel_extra_kwargs=None,
+            self_extra_kwargs=None):
         self.code = code_container
         self.queue = queue
         self.tree = tree
@@ -174,8 +186,12 @@ class SumpyExpansionWrangler(object):
         if kernel_extra_kwargs is None:
             kernel_extra_kwargs = {}
 
+        if self_extra_kwargs is None:
+            self_extra_kwargs = {}
+
         self.source_extra_kwargs = source_extra_kwargs
         self.kernel_extra_kwargs = kernel_extra_kwargs
+        self.self_extra_kwargs = self_extra_kwargs
 
         self.extra_kwargs = source_extra_kwargs.copy()
         self.extra_kwargs.update(self.kernel_extra_kwargs)
@@ -373,6 +389,7 @@ class SumpyExpansionWrangler(object):
         pot = self.potential_zeros()
 
         kwargs = self.extra_kwargs.copy()
+        kwargs.update(self.self_extra_kwargs)
         kwargs.update(self.box_source_list_kwargs())
         kwargs.update(self.box_target_list_kwargs())
 
diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index 3071a3b8..b0f15abb 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -49,7 +49,7 @@ Particle-to-particle
 # {{{ p2p base class
 
 class P2PBase(KernelComputation, KernelCacheWrapper):
-    def __init__(self, ctx, kernels,  exclude_self, strength_usage=None,
+    def __init__(self, ctx, kernels, exclude_self, strength_usage=None,
             value_dtypes=None,
             options=[], name=None, device=None):
         """
@@ -117,12 +117,8 @@ class P2P(P2PBase):
                 for i, name in enumerate(result_names)]
 
         if self.exclude_self:
-            from pymbolic.primitives import If, ComparisonOperator, Variable
-            exprs = [
-                    If(
-                        ComparisonOperator(Variable("isrc"), "!=", Variable("itgt")),
-                        expr, 0)
-                    for expr in exprs]
+            from pymbolic.primitives import If, Variable
+            exprs = [If(Variable("is_self"), 0, expr) for expr in exprs]
 
         from sumpy.tools import gather_loopy_source_arguments
         loopy_knl = lp.make_kernel(
@@ -133,8 +129,10 @@ class P2P(P2PBase):
                 for itgt
                     for isrc
                         """] + loopy_insns + ["""
-                        <> d[idim] = targets[idim,itgt] - sources[idim,isrc] \
-                        """]+[
+                        <> d[idim] = targets[idim,itgt] - sources[idim,isrc]
+                        """] + ["""
+                        <> is_self = (source_to_target[isrc] == itgt)
+                        """ if self.exclude_self else ""] + [
                         lp.Assignment(id=None,
                             assignee="pair_result_%d" % i, expression=expr,
                             temp_var_type=lp.auto)
@@ -159,7 +157,10 @@ class P2P(P2PBase):
                     lp.GlobalArg("strength", None, shape="nstrengths,nsources"),
                     lp.GlobalArg("result", None,
                         shape="nresults,ntargets", dim_tags="sep,C")
-                ] + gather_loopy_source_arguments(self.kernels),
+                ] + (
+                    [lp.GlobalArg("source_to_target", np.int32, shape=("nsources",))]
+                    if self.exclude_self else []
+                ) + gather_loopy_source_arguments(self.kernels),
                 name=self.name,
                 assumptions="nsources>=1 and ntargets>=1")
 
@@ -209,8 +210,6 @@ class P2P(P2PBase):
 class P2PFromCSR(P2PBase):
     default_name = "p2p_from_csr"
 
-    # FIXME: exclude_self ...?
-
     def get_kernel(self):
         loopy_insns, result_names = self.get_loopy_insns_and_result_names()
 
@@ -220,6 +219,10 @@ class P2PFromCSR(P2PBase):
                 * var("strength").index((self.strength_usage[i], var("isrc")))
                 for i, name in enumerate(result_names)]
 
+        if self.exclude_self:
+            from pymbolic.primitives import If, Variable
+            exprs = [If(Variable("is_self"), 0, expr) for expr in exprs]
+
         from sumpy.tools import gather_loopy_source_arguments
         loopy_knl = lp.make_kernel(
             [
@@ -251,7 +254,9 @@ class P2PFromCSR(P2PBase):
                                 <> d[idim] = \
                                         targets[idim,itgt] - sources[idim,isrc] \
                                         {dup=idim}
-                                """
+                                """] + ["""
+                                <> is_self = (source_to_target[isrc] == itgt)
+                                """ if self.exclude_self else ""] + [
                                 ] + loopy_insns + [
                                 lp.Assignment(id=None,
                                     assignee="pair_result_%d" % i, expression=expr,
@@ -286,7 +291,10 @@ class P2PFromCSR(P2PBase):
                 lp.ValueArg("nsources", np.int32),
                 lp.ValueArg("ntargets", np.int32),
                 "...",
-            ] + gather_loopy_source_arguments(self.kernels),
+            ] + (
+                [lp.GlobalArg("source_to_target", np.int32, shape=("nsources",))]
+                if self.exclude_self else []
+            ) + gather_loopy_source_arguments(self.kernels),
             name=self.name, assumptions="ntgt_boxes>=1")
 
         loopy_knl = lp.fix_parameters(
diff --git a/test/test_fmm.py b/test/test_fmm.py
index 2e2bfea4..acfcb2fa 100644
--- a/test/test_fmm.py
+++ b/test/test_fmm.py
@@ -221,6 +221,76 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
     pconv_verifier()
 
 
+def test_sumpy_fmm_exclude_self(ctx_getter):
+    logging.basicConfig(level=logging.INFO)
+
+    ctx = ctx_getter()
+    queue = cl.CommandQueue(ctx)
+
+    nsources = 500
+    dtype = np.float64
+
+    from boxtree.tools import (
+            make_normal_particle_array as p_normal)
+
+    knl = LaplaceKernel(2)
+    local_expn_class = VolumeTaylorLocalExpansion
+    mpole_expn_class = VolumeTaylorMultipoleExpansion
+    order = 10
+
+    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
+
+    from boxtree import TreeBuilder
+    tb = TreeBuilder(ctx)
+
+    tree, _ = tb(queue, sources,
+            max_particles_in_box=30, debug=True)
+
+    from boxtree.traversal import FMMTraversalBuilder
+    tbuild = FMMTraversalBuilder(ctx)
+    trav, _ = tbuild(queue, tree, debug=True)
+
+    from pyopencl.clrandom import PhiloxGenerator
+    rng = PhiloxGenerator(ctx)
+    weights = rng.uniform(queue, nsources, dtype=np.float64)
+
+    source_to_target = np.arange(tree.nsources, dtype=np.int32)
+    self_extra_kwargs = {"source_to_target": source_to_target}
+
+    out_kernels = [knl]
+
+    from functools import partial
+
+    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
+    wcc = SumpyExpansionWranglerCodeContainer(
+            ctx,
+            partial(mpole_expn_class, knl),
+            partial(local_expn_class, knl),
+            out_kernels,
+            exclude_self=True)
+
+    wrangler = wcc.get_wrangler(queue, tree, dtype,
+            fmm_level_to_order=lambda lev: order,
+            self_extra_kwargs=self_extra_kwargs)
+
+    from boxtree.fmm import drive_fmm
+
+    pot, = drive_fmm(trav, wrangler, weights)
+
+    from sumpy import P2P
+    p2p = P2P(ctx, out_kernels, exclude_self=True)
+    evt, (ref_pot,) = p2p(queue, sources, sources, (weights,),
+            **self_extra_kwargs)
+
+    pot = pot.get()
+    ref_pot = ref_pot.get()
+
+    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
+    logger.info("order %d -> relative l2 error: %g" % (order, rel_err))
+
+    assert np.isclose(rel_err, 0, atol=1e-7)
+
+
 # You can test individual routines by typing
 # $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)'
 
diff --git a/test/test_kernels.py b/test/test_kernels.py
index 628e2893..b7b51e7d 100644
--- a/test/test_kernels.py
+++ b/test/test_kernels.py
@@ -57,7 +57,8 @@ else:
     faulthandler.enable()
 
 
-def test_p2p(ctx_getter):
+@pytest.mark.parametrize("exclude_self", (True, False))
+def test_p2p(ctx_getter, exclude_self):
     ctx = ctx_getter()
     queue = cl.CommandQueue(ctx)
 
@@ -68,26 +69,36 @@ def test_p2p(ctx_getter):
     lknl = LaplaceKernel(dimensions)
     knl = P2P(ctx,
             [lknl, AxisTargetDerivative(0, lknl)],
-            exclude_self=False)
+            exclude_self=exclude_self)
 
     targets = np.random.rand(dimensions, n)
-    sources = np.random.rand(dimensions, n)
+    sources = targets if exclude_self else np.random.rand(dimensions, n)
 
     strengths = np.ones(n, dtype=np.float64)
 
+    extra_kwargs = {}
+
+    if exclude_self:
+        extra_kwargs["source_to_target"] = np.arange(n, dtype=np.int32)
+
     evt, (potential, x_derivative) = knl(
             queue, targets, sources, [strengths],
-            out_host=True)
+            out_host=True, **extra_kwargs)
 
     potential_ref = np.empty_like(potential)
 
     targets = targets.T
     sources = sources.T
     for itarg in range(n):
-        potential_ref[itarg] = np.sum(
-                strengths
-                /
-                np.sum((targets[itarg] - sources)**2, axis=-1)**0.5)
+
+        with np.errstate(divide="ignore"):
+            invdists = np.sum((targets[itarg] - sources) ** 2, axis=-1) ** -0.5
+
+        if exclude_self:
+            assert np.isinf(invdists[itarg])
+            invdists[itarg] = 0
+
+        potential_ref[itarg] = np.sum(strengths * invdists)
 
     potential_ref *= 1/(4*np.pi)
 
-- 
GitLab