diff --git a/setup.py b/setup.py index 60fed845f11e530fcbf3dee08feaa267593ed2f5..9cc8e07a4aca82f5ca122bc0f47f959cf20ee502 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ setup(name="sumpy", install_requires=[ "loo.py>=2013.1beta", - "pytools>=2013.3", + "pytools>=2013.5.6", "pytest>=2.3", # FIXME leave out for now diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 560a980de37da1cdbb157c4a6f11befd147d7c11..b92675dc2238ae19c0a7b217637c98f190365ec1 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -240,6 +240,8 @@ class E2EFromChildren(E2EBase): {id_prefix=write_expn,if=is_src_box_valid,dep=compute_coeff*} """], [ + lp.GlobalArg("target_boxes", None, shape=lp.auto, + offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.GlobalArg("box_child_ids", None, shape="nchildren, aligned_nboxes"), @@ -323,6 +325,8 @@ class E2EFromParent(E2EBase): {id_prefix=write_expn} """], [ + lp.GlobalArg("target_boxes", None, shape=lp.auto, + offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), lp.ValueArg("naligned_boxes,nboxes", np.int32), lp.GlobalArg("box_parent_ids", None, shape="nboxes"), diff --git a/sumpy/e2p.py b/sumpy/e2p.py index f87b9035b63f6cc3e892bb734f7cb9073121117c..8274ec9c5d9283436fafe97ec5f8321c4756c9f8 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -159,7 +159,7 @@ class E2PFromLocal(E2PBase): def get_optimized_kernel(self): # FIXME knl = self.get_kernel() - #knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") + knl = lp.tag_inames(knl, dict(itgt_box="g.0")) return knl def __call__(self, queue, **kwargs): diff --git a/sumpy/fmm.py b/sumpy/fmm.py index cf5382f6eb504662346a5ddd11c9b0fc12111372..56f878e53c0035ce6c2204dc3c97f53aef64e8ca 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -247,6 +247,7 @@ class SumpyExpansionWrangler(object): target_boxes=child_boxes, box_parent_ids=self.tree.box_parent_ids, centers=self.tree.box_centers, + **self.extra_kwargs) assert local_exps_res is local_exps diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 80cbd770e5bd983d49132ee74c20871be57037ae..5da14790f0d18dcebed582967cd69c3902ff9b7e 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -185,7 +185,7 @@ class P2EFromCSR(P2EBase): loopy_knl = lp.make_kernel(self.device, [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", - "{[isrc_box]: isrc_box_stop<=isrc_box<isrc_box_start}", + "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}", "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}", ], self.get_looy_instructions() @@ -199,11 +199,11 @@ class P2EFromCSR(P2EBase): <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] - <> center[idim] = centers[idim, src_ibox] {id=fetch_center} + <> center[idim] = centers[idim, tgt_ibox] {id=fetch_center} <> a[idim] = center[idim] - sources[idim, isrc] {id=compute_a} <> strength = strengths[isrc] - expansions[src_ibox, ${COEFFIDX}] = \ - sum(isrc, strength*coeff${COEFFIDX}) \ + expansions[tgt_ibox, ${COEFFIDX}] = \ + sum((isrc_box, isrc), strength*coeff${COEFFIDX}) \ {id_prefix=write_expn} """], arguments, diff --git a/test/test_fmm.py b/test/test_fmm.py index e3bb117bec2e7b539a1701addf53b08a921fde3e..96d16d4c28cc3a24e40e9be2944fccddea6a681a 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -41,7 +41,7 @@ else: faulthandler.enable() -def no_test_sumpy_fmm(ctx_getter): +def test_sumpy_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() @@ -56,9 +56,16 @@ def no_test_sumpy_fmm(ctx_getter): make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, dim, dtype, seed=15) - targets = ( - p_normal(queue, ntargets, dim, dtype, seed=18) - + np.array([2, 0])) + if 1: + targets = ( + p_normal(queue, ntargets, dim, dtype, seed=18) + + np.array([0.1, 0])) + else: + from sumpy.visualization import FieldPlotter + fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) + from pytools.obj_array import make_obj_array + targets = make_obj_array( + [fp.points[0], fp.points[1]]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) @@ -66,9 +73,24 @@ def no_test_sumpy_fmm(ctx_getter): tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + # {{{ plot tree + if 0: + host_tree = tree.get() + host_trav = trav.get() + + if 1: + print "src_box", host_tree.find_box_nr_for_source(403) + print "tgt_box", host_tree.find_box_nr_for_target(28) + print list(host_trav.target_or_target_parent_boxes).index(37) + print host_trav.get_box_list("sep_bigger", 22) + from boxtree.visualization import TreePlotter - plotter = TreePlotter(tree.get()) + plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() @@ -76,20 +98,11 @@ def no_test_sumpy_fmm(ctx_getter): import matplotlib.pyplot as pt pt.show() - from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(ctx) - trav, _ = tbuild(queue, tree, debug=True) + # }}} - trav = trav.get() - - if 1: - from pyopencl.clrandom import RanluxGenerator - rng = RanluxGenerator(queue, seed=20) - weights = rng.uniform(queue, nsources, dtype=np.float64) - else: - weights = np.zeros(nsources) - weights[0] = 1 - weights = cl.array.to_device(queue, weights) + from pyopencl.clrandom import RanluxGenerator + rng = RanluxGenerator(queue, seed=20) + weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") @@ -97,49 +110,39 @@ def no_test_sumpy_fmm(ctx_getter): from sumpy.expansion.local import VolumeTaylorLocalExpansion from sumpy.kernel import LaplaceKernel - order = 1 + from pytools.convergence import PConvergenceVerifier + + pconv_verifier = PConvergenceVerifier() knl = LaplaceKernel(dim) - mpole_expn = VolumeTaylorMultipoleExpansion(knl, order) - local_expn = VolumeTaylorLocalExpansion(knl, order) - out_kernels = [knl] - from sumpy.fmm import SumpyExpansionWranglerCodeContainer - wcc = SumpyExpansionWranglerCodeContainer( - ctx, tree, mpole_expn, local_expn, out_kernels) - wrangler = wcc.get_wrangler(queue, np.float64) + for order in [1, 2, 3]: + mpole_expn = VolumeTaylorMultipoleExpansion(knl, order) + local_expn = VolumeTaylorLocalExpansion(knl, order) + out_kernels = [knl] - from boxtree.fmm import drive_fmm - pot, = drive_fmm(trav, wrangler, weights) - #print la.norm(pot.get()) - #1/0 + from sumpy.fmm import SumpyExpansionWranglerCodeContainer + wcc = SumpyExpansionWranglerCodeContainer( + ctx, tree, mpole_expn, local_expn, out_kernels) + wrangler = wcc.get_wrangler(queue, np.float64) - if 0: - from sumpy.tools import build_matrix - - def matvec(x): - pot, = drive_fmm(trav, wrangler, cl.array.to_device(queue, x)) - return pot.get() - - mat = build_matrix(matvec, dtype=np.float64, shape=(ntargets, nsources)) - if 0: - amat = np.abs(mat) - sum_over_rows = np.sum(amat, axis=0) - print np.where(sum_over_rows == 0) - else: - import matplotlib.pyplot as pt - pt.imshow(mat) - pt.show() - - from sumpy import P2P - p2p = P2P(ctx, out_kernels, exclude_self=False) - evt, (ref_pot,) = p2p(queue, targets, sources, (weights,)) - - pot = pot.get() - ref_pot = ref_pot.get() - - rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) - logger.info("relative l2 error: %g" % rel_err) - assert rel_err < 1e-5 + from boxtree.fmm import drive_fmm + + pot, = drive_fmm(trav, wrangler, weights) + + from sumpy import P2P + p2p = P2P(ctx, out_kernels, exclude_self=False) + evt, (ref_pot,) = p2p(queue, targets, sources, (weights,)) + + pot = pot.get() + ref_pot = ref_pot.get() + + rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) + logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) + + pconv_verifier.add_data_point(order, rel_err) + + print pconv_verifier + pconv_verifier() # You can test individual routines by typing diff --git a/test/test_kernels.py b/test/test_kernels.py index 35dad42ba9664b7f48a0dd2a88c3ea65d7402718..ef660cffad79aed1ecf4762c39957ac7bf5ba59b 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -35,6 +35,7 @@ from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion from sumpy.expansion.local import VolumeTaylorLocalExpansion, H2DLocalExpansion from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, DirectionalSourceDerivative) +from pytools.convergence import PConvergenceVerifier import logging logger = logging.getLogger(__name__) @@ -278,52 +279,6 @@ def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative): assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack -class PConvergenceVerifier(object): - def __init__(self): - self.orders = [] - self.errors = [] - - def add_data_point(self, order, error): - self.orders.append(order) - self.errors.append(error) - - def __str__(self): - from pytools import Table - tbl = Table() - tbl.add_row(("p", "error")) - - for p, err in zip(self.orders, self.errors): - tbl.add_row((str(p), str(err))) - - return str(tbl) - - def __call__(self): - orders = np.array(self.orders, np.float64) - errors = np.abs(np.array(self.errors, np.float64)) - - rel_change = np.diff(1e-20 + np.log10(errors)) / np.diff(orders) - - assert (rel_change < -0.2).all() - - -def test_p_convergence_verifier(): - pconv_verifier = PConvergenceVerifier() - for order in [2, 3, 4, 5]: - pconv_verifier.add_data_point(order, 0.1**order) - pconv_verifier() - - pconv_verifier = PConvergenceVerifier() - for order in [2, 3, 4, 5]: - pconv_verifier.add_data_point(order, 0.5**order) - pconv_verifier() - - pconv_verifier = PConvergenceVerifier() - for order in [2, 3, 4, 5]: - pconv_verifier.add_data_point(order, 2) - with pytest.raises(AssertionError): - pconv_verifier() - - @pytest.mark.parametrize("knl", [ LaplaceKernel(2), HelmholtzKernel(2)