From 96b507b3a88e25e37df87089b682377c9595b1a8 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 28 Jul 2013 22:38:29 -0400
Subject: [PATCH] Make Taylor FMM work

---
 setup.py             |   2 +-
 sumpy/e2e.py         |   4 ++
 sumpy/e2p.py         |   2 +-
 sumpy/fmm.py         |   1 +
 sumpy/p2e.py         |   8 +--
 test/test_fmm.py     | 117 ++++++++++++++++++++++---------------------
 test/test_kernels.py |  47 +----------------
 7 files changed, 72 insertions(+), 109 deletions(-)

diff --git a/setup.py b/setup.py
index 60fed845..9cc8e07a 100644
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,7 @@ setup(name="sumpy",
 
       install_requires=[
           "loo.py>=2013.1beta",
-          "pytools>=2013.3",
+          "pytools>=2013.5.6",
           "pytest>=2.3",
 
           # FIXME leave out for now
diff --git a/sumpy/e2e.py b/sumpy/e2e.py
index 560a980d..b92675dc 100644
--- a/sumpy/e2e.py
+++ b/sumpy/e2e.py
@@ -240,6 +240,8 @@ class E2EFromChildren(E2EBase):
                         {id_prefix=write_expn,if=is_src_box_valid,dep=compute_coeff*}
                     """],
                 [
+                    lp.GlobalArg("target_boxes", None, shape=lp.auto,
+                        offset=lp.auto),
                     lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                     lp.GlobalArg("box_child_ids", None,
                         shape="nchildren, aligned_nboxes"),
@@ -323,6 +325,8 @@ class E2EFromParent(E2EBase):
                         {id_prefix=write_expn}
                     """],
                 [
+                    lp.GlobalArg("target_boxes", None, shape=lp.auto,
+                        offset=lp.auto),
                     lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                     lp.ValueArg("naligned_boxes,nboxes", np.int32),
                     lp.GlobalArg("box_parent_ids", None, shape="nboxes"),
diff --git a/sumpy/e2p.py b/sumpy/e2p.py
index f87b9035..8274ec9c 100644
--- a/sumpy/e2p.py
+++ b/sumpy/e2p.py
@@ -159,7 +159,7 @@ class E2PFromLocal(E2PBase):
     def get_optimized_kernel(self):
         # FIXME
         knl = self.get_kernel()
-        #knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0")
+        knl = lp.tag_inames(knl, dict(itgt_box="g.0"))
         return knl
 
     def __call__(self, queue, **kwargs):
diff --git a/sumpy/fmm.py b/sumpy/fmm.py
index cf5382f6..56f878e5 100644
--- a/sumpy/fmm.py
+++ b/sumpy/fmm.py
@@ -247,6 +247,7 @@ class SumpyExpansionWrangler(object):
                 target_boxes=child_boxes,
                 box_parent_ids=self.tree.box_parent_ids,
                 centers=self.tree.box_centers,
+
                 **self.extra_kwargs)
 
         assert local_exps_res is local_exps
diff --git a/sumpy/p2e.py b/sumpy/p2e.py
index 80cbd770..5da14790 100644
--- a/sumpy/p2e.py
+++ b/sumpy/p2e.py
@@ -185,7 +185,7 @@ class P2EFromCSR(P2EBase):
         loopy_knl = lp.make_kernel(self.device,
                 [
                     "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
-                    "{[isrc_box]: isrc_box_stop<=isrc_box<isrc_box_start}",
+                    "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}",
                     "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}",
                     ],
                 self.get_looy_instructions()
@@ -199,11 +199,11 @@ class P2EFromCSR(P2EBase):
                     <> isrc_start = box_source_starts[src_ibox]
                     <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox]
 
-                    <> center[idim] = centers[idim, src_ibox] {id=fetch_center}
+                    <> center[idim] = centers[idim, tgt_ibox] {id=fetch_center}
                     <> a[idim] = center[idim] - sources[idim, isrc] {id=compute_a}
                     <> strength = strengths[isrc]
-                    expansions[src_ibox, ${COEFFIDX}] = \
-                            sum(isrc, strength*coeff${COEFFIDX}) \
+                    expansions[tgt_ibox, ${COEFFIDX}] = \
+                            sum((isrc_box, isrc), strength*coeff${COEFFIDX}) \
                             {id_prefix=write_expn}
                     """],
                 arguments,
diff --git a/test/test_fmm.py b/test/test_fmm.py
index e3bb117b..96d16d4c 100644
--- a/test/test_fmm.py
+++ b/test/test_fmm.py
@@ -41,7 +41,7 @@ else:
     faulthandler.enable()
 
 
-def no_test_sumpy_fmm(ctx_getter):
+def test_sumpy_fmm(ctx_getter):
     logging.basicConfig(level=logging.INFO)
 
     ctx = ctx_getter()
@@ -56,9 +56,16 @@ def no_test_sumpy_fmm(ctx_getter):
             make_normal_particle_array as p_normal)
 
     sources = p_normal(queue, nsources, dim, dtype, seed=15)
-    targets = (
-            p_normal(queue, ntargets, dim, dtype, seed=18)
-            + np.array([2, 0]))
+    if 1:
+        targets = (
+                p_normal(queue, ntargets, dim, dtype, seed=18)
+                + np.array([0.1, 0]))
+    else:
+        from sumpy.visualization import FieldPlotter
+        fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200)
+        from pytools.obj_array import make_obj_array
+        targets = make_obj_array(
+                [fp.points[0], fp.points[1]])
 
     from boxtree import TreeBuilder
     tb = TreeBuilder(ctx)
@@ -66,9 +73,24 @@ def no_test_sumpy_fmm(ctx_getter):
     tree, _ = tb(queue, sources, targets=targets,
             max_particles_in_box=30, debug=True)
 
+    from boxtree.traversal import FMMTraversalBuilder
+    tbuild = FMMTraversalBuilder(ctx)
+    trav, _ = tbuild(queue, tree, debug=True)
+
+    # {{{ plot tree
+
     if 0:
+        host_tree = tree.get()
+        host_trav = trav.get()
+
+        if 1:
+            print "src_box", host_tree.find_box_nr_for_source(403)
+            print "tgt_box", host_tree.find_box_nr_for_target(28)
+            print list(host_trav.target_or_target_parent_boxes).index(37)
+            print host_trav.get_box_list("sep_bigger", 22)
+
         from boxtree.visualization import TreePlotter
-        plotter = TreePlotter(tree.get())
+        plotter = TreePlotter(host_tree)
         plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
         plotter.set_bounding_box()
         plotter.draw_box_numbers()
@@ -76,20 +98,11 @@ def no_test_sumpy_fmm(ctx_getter):
         import matplotlib.pyplot as pt
         pt.show()
 
-    from boxtree.traversal import FMMTraversalBuilder
-    tbuild = FMMTraversalBuilder(ctx)
-    trav, _ = tbuild(queue, tree, debug=True)
+    # }}}
 
-    trav = trav.get()
-
-    if 1:
-        from pyopencl.clrandom import RanluxGenerator
-        rng = RanluxGenerator(queue, seed=20)
-        weights = rng.uniform(queue, nsources, dtype=np.float64)
-    else:
-        weights = np.zeros(nsources)
-        weights[0] = 1
-        weights = cl.array.to_device(queue, weights)
+    from pyopencl.clrandom import RanluxGenerator
+    rng = RanluxGenerator(queue, seed=20)
+    weights = rng.uniform(queue, nsources, dtype=np.float64)
 
     logger.info("computing direct (reference) result")
 
@@ -97,49 +110,39 @@ def no_test_sumpy_fmm(ctx_getter):
     from sumpy.expansion.local import VolumeTaylorLocalExpansion
     from sumpy.kernel import LaplaceKernel
 
-    order = 1
+    from pytools.convergence import PConvergenceVerifier
+
+    pconv_verifier = PConvergenceVerifier()
     knl = LaplaceKernel(dim)
-    mpole_expn = VolumeTaylorMultipoleExpansion(knl, order)
-    local_expn = VolumeTaylorLocalExpansion(knl, order)
-    out_kernels = [knl]
 
-    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
-    wcc = SumpyExpansionWranglerCodeContainer(
-            ctx, tree, mpole_expn, local_expn, out_kernels)
-    wrangler = wcc.get_wrangler(queue, np.float64)
+    for order in [1, 2, 3]:
+        mpole_expn = VolumeTaylorMultipoleExpansion(knl, order)
+        local_expn = VolumeTaylorLocalExpansion(knl, order)
+        out_kernels = [knl]
 
-    from boxtree.fmm import drive_fmm
-    pot, = drive_fmm(trav, wrangler, weights)
-    #print la.norm(pot.get())
-    #1/0
+        from sumpy.fmm import SumpyExpansionWranglerCodeContainer
+        wcc = SumpyExpansionWranglerCodeContainer(
+                ctx, tree, mpole_expn, local_expn, out_kernels)
+        wrangler = wcc.get_wrangler(queue, np.float64)
 
-    if 0:
-        from sumpy.tools import build_matrix
-
-        def matvec(x):
-            pot, = drive_fmm(trav, wrangler, cl.array.to_device(queue, x))
-            return pot.get()
-
-        mat = build_matrix(matvec, dtype=np.float64, shape=(ntargets, nsources))
-        if 0:
-            amat = np.abs(mat)
-            sum_over_rows = np.sum(amat, axis=0)
-            print np.where(sum_over_rows == 0)
-        else:
-            import matplotlib.pyplot as pt
-            pt.imshow(mat)
-            pt.show()
-
-    from sumpy import P2P
-    p2p = P2P(ctx, out_kernels, exclude_self=False)
-    evt, (ref_pot,) = p2p(queue, targets, sources, (weights,))
-
-    pot = pot.get()
-    ref_pot = ref_pot.get()
-
-    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
-    logger.info("relative l2 error: %g" % rel_err)
-    assert rel_err < 1e-5
+        from boxtree.fmm import drive_fmm
+
+        pot, = drive_fmm(trav, wrangler, weights)
+
+        from sumpy import P2P
+        p2p = P2P(ctx, out_kernels, exclude_self=False)
+        evt, (ref_pot,) = p2p(queue, targets, sources, (weights,))
+
+        pot = pot.get()
+        ref_pot = ref_pot.get()
+
+        rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
+        logger.info("order %d -> relative l2 error: %g" % (order, rel_err))
+
+        pconv_verifier.add_data_point(order, rel_err)
+
+    print pconv_verifier
+    pconv_verifier()
 
 
 # You can test individual routines by typing
diff --git a/test/test_kernels.py b/test/test_kernels.py
index 35dad42b..ef660cff 100644
--- a/test/test_kernels.py
+++ b/test/test_kernels.py
@@ -35,6 +35,7 @@ from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion
 from sumpy.expansion.local import VolumeTaylorLocalExpansion, H2DLocalExpansion
 from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative,
         DirectionalSourceDerivative)
+from pytools.convergence import PConvergenceVerifier
 
 import logging
 logger = logging.getLogger(__name__)
@@ -278,52 +279,6 @@ def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative):
     assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack
 
 
-class PConvergenceVerifier(object):
-    def __init__(self):
-        self.orders = []
-        self.errors = []
-
-    def add_data_point(self, order, error):
-        self.orders.append(order)
-        self.errors.append(error)
-
-    def __str__(self):
-        from pytools import Table
-        tbl = Table()
-        tbl.add_row(("p", "error"))
-
-        for p, err in zip(self.orders, self.errors):
-            tbl.add_row((str(p), str(err)))
-
-        return str(tbl)
-
-    def __call__(self):
-        orders = np.array(self.orders, np.float64)
-        errors = np.abs(np.array(self.errors, np.float64))
-
-        rel_change = np.diff(1e-20 + np.log10(errors)) / np.diff(orders)
-
-        assert (rel_change < -0.2).all()
-
-
-def test_p_convergence_verifier():
-    pconv_verifier = PConvergenceVerifier()
-    for order in [2, 3, 4, 5]:
-        pconv_verifier.add_data_point(order, 0.1**order)
-    pconv_verifier()
-
-    pconv_verifier = PConvergenceVerifier()
-    for order in [2, 3, 4, 5]:
-        pconv_verifier.add_data_point(order, 0.5**order)
-    pconv_verifier()
-
-    pconv_verifier = PConvergenceVerifier()
-    for order in [2, 3, 4, 5]:
-        pconv_verifier.add_data_point(order, 2)
-    with pytest.raises(AssertionError):
-        pconv_verifier()
-
-
 @pytest.mark.parametrize("knl", [
     LaplaceKernel(2),
     HelmholtzKernel(2)
-- 
GitLab