diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 61329a140f6b2a5468d2a1490bf60de5a2f8c91c..560a980de37da1cdbb157c4a6f11befd147d7c11 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -132,14 +132,18 @@ class E2EFromCSR(E2EBase): <> isrc_start = src_box_starts[itgt_box] <> isrc_stop = src_box_starts[itgt_box+1] - <> src_ibox = src_box_lists[isrc_box] + <> src_ibox = src_box_lists[isrc_box] \ + {id=read_src_ibox} <> src_center[idim] = centers[idim, src_ibox] \ {id=fetch_src_center} <> d[idim] = tgt_center[idim] - src_center[idim] <> src_coeff${SRC_COEFFIDX} = \ - src_expansions[src_ibox, ${SRC_COEFFIDX}] + src_expansions[src_ibox, ${SRC_COEFFIDX}] \ + {dep=read_src_ibox} + tgt_expansions[tgt_ibox, ${TGT_COEFFIDX}] = \ - coeff${TGT_COEFFIDX} {id_prefix=write_expn} + sum(isrc_box, coeff${TGT_COEFFIDX}) \ + {id_prefix=write_expn} """], [ lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), @@ -201,8 +205,9 @@ class E2EFromChildren(E2EBase): # (same for itgt_box, tgt_ibox) loopy_insns = [ - insn.copy(predicates=insn.predicates - | frozenset(["is_src_box_valid"])) + insn.copy( + predicates=insn.predicates | frozenset(["is_src_box_valid"]), + id=lp.UniqueName("compute_coeff")) for insn in self.get_translation_loopy_insns()] from sumpy.tools import gather_arguments @@ -218,7 +223,8 @@ class E2EFromChildren(E2EBase): <> tgt_center[idim] = centers[idim, tgt_ibox] \ {id=fetch_tgt_center} - <> src_ibox = box_child_ids[isrc_box,tgt_ibox] + <> src_ibox = box_child_ids[isrc_box,tgt_ibox] \ + {id=read_src_ibox} <> is_src_box_valid = src_ibox != 0 <> src_center[idim] = centers[idim, src_ibox] \ @@ -228,10 +234,10 @@ class E2EFromChildren(E2EBase): <> src_coeff${COEFFIDX} = \ expansions[src_ibox, ${COEFFIDX}] \ - {if=is_src_box_valid} + {if=is_src_box_valid,dep=read_src_ibox} expansions[tgt_ibox, ${COEFFIDX}] = \ expansions[tgt_ibox, ${COEFFIDX}] + coeff${COEFFIDX} \ - {id_prefix=write_expn,if=is_src_box_valid,dep=} + {id_prefix=write_expn,if=is_src_box_valid,dep=compute_coeff*} """], [ lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), @@ -303,13 +309,15 @@ class E2EFromParent(E2EBase): <> tgt_center[idim] = centers[idim, tgt_ibox] \ {id=fetch_tgt_center} - <> src_ibox = box_parent_ids[tgt_ibox] + <> src_ibox = box_parent_ids[tgt_ibox] \ + {id=read_src_ibox} <> src_center[idim] = centers[idim, src_ibox] \ {id=fetch_src_center} <> d[idim] = tgt_center[idim] - src_center[idim] <> src_coeff${COEFFIDX} = \ - expansions[src_ibox, ${COEFFIDX}] + expansions[src_ibox, ${COEFFIDX}] \ + {dep=read_src_ibox} expansions[tgt_ibox, ${COEFFIDX}] = \ expansions[tgt_ibox, ${COEFFIDX}] + coeff${COEFFIDX} \ {id_prefix=write_expn} diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 1dea621c89815d39fee4a2625e22c2d4de44a348..f87b9035b63f6cc3e892bb734f7cb9073121117c 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -250,7 +250,7 @@ class E2PFromCSR(E2PBase): def get_optimized_kernel(self): # FIXME knl = self.get_kernel() - #knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") + knl = lp.tag_inames(knl, dict(itgt_box="g.0")) return knl def __call__(self, queue, **kwargs): diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 2b2f3d2fb96b4271a988a9cf29bf451b4c89547a..cf5382f6eb504662346a5ddd11c9b0fc12111372 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -148,6 +148,7 @@ class SumpyExpansionWrangler(object): target_boxes=parent_boxes, box_child_ids=self.tree.box_child_ids, centers=self.tree.box_centers, + **self.extra_kwargs) assert mpoles_res is mpoles diff --git a/sumpy/tools.py b/sumpy/tools.py index df6dbf426ae1f597aa763f74030cf86ebd125d32..9b94dc4d94939dc36a4ce40eac8463d1f7f92f35 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -55,17 +55,23 @@ def mi_derivative(expr, vector, mi): # }}} -def build_matrix(op, dtype=None): +def build_matrix(op, dtype=None, shape=None): dtype = dtype or op.dtype from pytools import ProgressBar - rows, cols = op.shape + shape = shape or op.shape + rows, cols = shape pb = ProgressBar("matrix", cols) - mat = np.zeros(op.shape, dtype) + mat = np.zeros(shape, dtype) + + try: + matvec_method = op.matvec + except AttributeError: + matvec_method = op.__call__ for i in range(cols): - unit_vec = np.zeros(rows, dtype=dtype) + unit_vec = np.zeros(cols, dtype=dtype) unit_vec[i] = 1 - mat[:, i] = op.matvec(unit_vec) + mat[:, i] = matvec_method(unit_vec) pb.progress() pb.finished() diff --git a/test/test_fmm.py b/test/test_fmm.py index fb23350878444fe4dea617867f041c09bf85a5a1..e3bb117bec2e7b539a1701addf53b08a921fde3e 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -66,17 +66,30 @@ def no_test_sumpy_fmm(ctx_getter): tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) + if 0: + from boxtree.visualization import TreePlotter + plotter = TreePlotter(tree.get()) + plotter.draw_tree(fill=False, edgecolor="black", zorder=10) + plotter.set_bounding_box() + plotter.draw_box_numbers() + + import matplotlib.pyplot as pt + pt.show() + from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get() - from pyopencl.clrandom import RanluxGenerator - rng = RanluxGenerator(queue, seed=20) - - weights = rng.uniform(queue, nsources, dtype=np.float64) - #weights = np.ones(nsources) + if 1: + from pyopencl.clrandom import RanluxGenerator + rng = RanluxGenerator(queue, seed=20) + weights = rng.uniform(queue, nsources, dtype=np.float64) + else: + weights = np.zeros(nsources) + weights[0] = 1 + weights = cl.array.to_device(queue, weights) logger.info("computing direct (reference) result") @@ -84,7 +97,7 @@ def no_test_sumpy_fmm(ctx_getter): from sumpy.expansion.local import VolumeTaylorLocalExpansion from sumpy.kernel import LaplaceKernel - order = 2 + order = 1 knl = LaplaceKernel(dim) mpole_expn = VolumeTaylorMultipoleExpansion(knl, order) local_expn = VolumeTaylorLocalExpansion(knl, order) @@ -97,6 +110,25 @@ def no_test_sumpy_fmm(ctx_getter): from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) + #print la.norm(pot.get()) + #1/0 + + if 0: + from sumpy.tools import build_matrix + + def matvec(x): + pot, = drive_fmm(trav, wrangler, cl.array.to_device(queue, x)) + return pot.get() + + mat = build_matrix(matvec, dtype=np.float64, shape=(ntargets, nsources)) + if 0: + amat = np.abs(mat) + sum_over_rows = np.sum(amat, axis=0) + print np.where(sum_over_rows == 0) + else: + import matplotlib.pyplot as pt + pt.imshow(mat) + pt.show() from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False)