diff --git a/boxtree/dfmm.py b/boxtree/dfmm.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd8350e4d36546368b04cea661669326b592cd2 --- /dev/null +++ b/boxtree/dfmm.py @@ -0,0 +1,117 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner \ + Copyright (C) 2017 Hao Gao" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +logger = logging.getLogger(__name__) + +from mpi4py import MPI +import numpy as np + +def drive_dfmm(traversal, expansion_wrangler, src_weights): + + # {{{ Get MPI information + + comm = MPI.COMM_WORLD + current_rank = comm.Get_rank() + total_rank = comm.Get_size() + + # }}} + + # {{{ Distribute tree parameters + + if current_rank == 0: + tree = traversal.tree + # TODO: distribute more parameters of the tree + parameters = {"sources_are_targets": tree.sources_are_targets, + "sources_have_extent": tree.sources_have_extent, + "nsources":tree.nsources, + "nboxes":tree.box_source_starts.shape[0], + "dimensions":tree.sources.shape[0], + "coord_dtype":tree.coord_dtype, + "box_id_dtype":tree.box_id_dtype} + else: + parameters = None + parameters = comm.bcast(parameters, root=0) + + # }}} + + # {{{ Fill tree parameters to the locally essentail tree + + from boxtree import Tree + letree = Tree() + # TODO: add more parameters to the locally essential tree + letree.sources_are_targets = parameters["sources_are_targets"] + + # }}} + + # {{{ Construct locally essential tree mask for each rank + + # Problem: Current implementation divides all boxes evenly across all + # ranks. This scheme is subject to significant load imbalance. A better way to do + # this is to assign a weight to each box according to its interaction list, and then + # divides boxes evenly by the total weights. + + if current_rank == 0: + # mask[i][j] is true iff box j is in the locally essential tree of rank i + mask = np.zeros((total_rank, parameters["nboxes"]), dtype=bool) + num_boxes_per_rank = (parameters["nboxes"] + total_rank - 1) // total_rank + + for i in range(total_rank): + # Get the start and end box index for rank i + box_start_idx = num_boxes_per_rank * i + if current_rank == total_rank - 1: + box_end_idx = parameters["nboxes"] + else: + box_end_idx = num_boxes_per_rank * (i + 1) + + # Mark all ancestors of boxes of rank i + new_mask = np.zeros(parameters["nboxes"], dtype=bool) + new_mask[box_start_idx:box_end_idx] = True + while np.count_nonzero(new_mask) != 0: + np.logical_or(mask[i, :], new_mask, out=mask[i, :]) + new_mask_idx = np.nonzero(new_mask) + new_mask_parent_idx = tree.box_parent_ids[new_mask_idx] + new_mask[:] = False + new_mask[new_mask_parent_idx] = True + new_mask = np.logical_and(new_mask, np.logical_not(mask[i, :]), + out=new_mask) + + # Generate interaction list mask for mask[i, :] + interaction_mask = np.zeros(parameters["nboxes"], dtype=bool) + box_indices = np.nonzero(mask[i, :]) + for j in range(len(box_indices)): + box_index = box_indices[j] + # List 1 + start, end = traversal.neighbor_source_boxes_starts[box_index:box_index + 2] + list1_idx = traversal.neighbor_source_boxes_lists[start:end] + interaction_mask[list1_idx] = True + # List 2 + start, end = traversal.from_sep_siblings_starts[box_index:box_index + 2] + list2_idx = traversal.from_sep_siblings_lists[start:end] + interaction_mask[list2_idx] = True + # List 3 + + + # }}} diff --git a/test/test_dfmm.py b/test/test_dfmm.py new file mode 100644 index 0000000000000000000000000000000000000000..2a6c72422cab87384a80b74af2dc38e42371a560 --- /dev/null +++ b/test/test_dfmm.py @@ -0,0 +1,80 @@ +import numpy as np +import sys +from mpi4py import MPI + +# Parameters +dims = 2 +nsources = 30 +ntargets = 10 +dtype = np.float64 + +# Get the current rank +comm = MPI.COMM_WORLD +rank = comm.Get_rank() + +# Initialization +trav = None +sources_weights = None +wrangler = None + +# Generate particles and run shared-memory parallelism on rank 0 +if rank == 0: + # Configure PyOpenCL + import pyopencl as cl + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + + # Generate random particles and source weights + from boxtree.tools import make_normal_particle_array as p_normal + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims] + + from boxtree.tools import particle_array_to_host + sources_host = particle_array_to_host(sources) + targets_host = particle_array_to_host(targets) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=20) + sources_weights = rng.uniform(queue, nsources, dtype=np.float64).get() + + # Display sources and targets + if "--display" in sys.argv: + import matplotlib.pyplot as plt + plt.plot(sources_host[:, 0], sources_host[:, 1], "bo") + plt.plot(targets_host[:, 0], targets_host[:, 1], "ro") + plt.show() + + # Calculate potentials using direct evaluation + import numpy.linalg as la + distances = la.norm(sources_host.reshape(1, nsources, 2) - \ + targets_host.reshape(ntargets, 1, 2), + ord=2, axis=2) + pot_naive = np.sum(-np.log(distances)*sources_weights, axis=1) + + # Build the tree and interaction lists + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tg = FMMTraversalBuilder(ctx) + trav, _ = tg(queue, tree, debug=True) + trav = trav.get(queue=queue) + + # Get pyfmmlib expansion wrangler + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + def fmm_level_to_nterms(tree, level): + return 20 + wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms=fmm_level_to_nterms) + + # Compute FMM using shared memory parallelism + from boxtree.fmm import drive_fmm + pot_fmm = drive_fmm(trav, wrangler, sources_weights)* 2 * np.pi + print(la.norm(pot_fmm - pot_naive, ord=2)) + +# Compute FMM using distributed memory parallelism +from boxtree.dfmm import drive_dfmm +# Note: The drive_dfmm interface works as follows: +# Rank 0 passes the correct trav, wrangler, and sources_weights +# All other ranks pass None to these arguments +pot_dfmm = drive_dfmm(trav, wrangler, sources_weights)