diff --git a/boxtree/fmm.py b/boxtree/fmm.py index afaa93d69ceb2e1d56a9b7a493eb44004bb65bd2..818de7a68aa20cd3526bda5dcb1667f89f0725cf 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -25,7 +25,16 @@ THE SOFTWARE. import logging logger = logging.getLogger(__name__) -from pytools import ProcessLogger, Record + +try: + # Python 3 + from collections.abc import Mapping +except ImportError: + # Python 2 + from collections import Mapping + + +from pytools import ProcessLogger def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): @@ -357,16 +366,42 @@ class ExpansionWranglerInterface: # {{{ timing result -class TimingResult(Record): - """ - .. attribute:: wall_elapsed - .. attribute:: process_elapsed +class TimingResult(Mapping): + """Interface for returned timing data. + + This supports accessing timing results via a mapping interface, along with + combining results via :meth:`merge`. + + .. automethod:: merge """ - def __init__(self, wall_elapsed, process_elapsed): - Record.__init__(self, - wall_elapsed=wall_elapsed, - process_elapsed=process_elapsed) + def __init__(self, *args, **kwargs): + """See constructor for :class:`dict`.""" + self._mapping = dict(*args, **kwargs) + + def __getitem__(self, key): + return self._mapping[key] + + def __iter__(self): + return iter(self._mapping) + + def __len__(self): + return len(self._mapping) + + def merge(self, other): + """Merge this result with another by adding together common fields.""" + result = {} + + for key in self: + val = self.get(key) + other_val = other.get(key) + + if val is None or other_val is None: + continue + + result[key] = val + other_val + + return type(self)(result) # }}} @@ -402,17 +437,6 @@ class TimingRecorder(object): def add(self, description, future): self.futures[description].append(future) - def merge(self, result1, result2): - wall_elapsed = None - process_elapsed = None - - if None not in (result1.wall_elapsed, result2.wall_elapsed): - wall_elapsed = result1.wall_elapsed + result2.wall_elapsed - if None not in (result1.process_elapsed, result2.process_elapsed): - process_elapsed = result1.process_elapsed + result2.process_elapsed - - return TimingResult(wall_elapsed, process_elapsed) - def summarize(self): result = {} @@ -421,7 +445,7 @@ class TimingRecorder(object): timing_result = next(futures).result() for future in futures: - timing_result = self.merge(timing_result, future.result()) + timing_result = timing_result.merge(future.result()) result[description] = timing_result diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index c076e56e2f47470bbb70c3acc1fcf0c7da8d7142..33e9dec7d087cecb0610a35e9ef08a6daebc631d 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -41,6 +41,10 @@ __doc__ = """Integrates :mod:`boxtree` with class FMMLibExpansionWrangler(object): """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` by using pyfmmlib. + + Timing results returned by this wrangler contains the values *wall_elapsed* + and (optionally, if supported) *process_elapsed*, which measure wall time + and process time in seconds, respectively. """ # {{{ constructor diff --git a/boxtree/tools.py b/boxtree/tools.py index 97cd43d6db37a506d6551e91b77d72f86e1f09f7..4d6fe43e859944d50768b3bccceee7309f1fba41 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -517,14 +517,18 @@ class DummyTimingFuture(TimingFuture): @classmethod def from_timer(cls, timer): - return cls(timer.wall_elapsed, timer.process_elapsed) + return cls(wall_elapsed=timer.wall_elapsed, + process_elapsed=timer.process_elapsed) - def __init__(self, wall_elapsed, process_elapsed): - self.wall_elapsed = wall_elapsed - self.process_elapsed = process_elapsed + @classmethod + def from_op_count(cls, op_count): + return cls(ops_elapsed=op_count) + + def __init__(self, *args, **kwargs): + self._result = TimingResult(*args, **kwargs) def result(self): - return TimingResult(self.wall_elapsed, self.process_elapsed) + return self._result def done(self): return True @@ -598,4 +602,198 @@ class InlineBinarySearch(object): # }}} + +# {{{ constant one wrangler + +class ConstantOneExpansionWrangler(object): + """This implements the 'analytical routines' for a Green's function that is + constant 1 everywhere. For 'charges' of 'ones', this should get every particle + a copy of the particle count. + + Timing results returned by this wrangler contain the field *ops_elapsed*, + which counts approximately the number of floating-point operations required. + """ + + def __init__(self, tree): + self.tree = tree + + def multipole_expansion_zeros(self): + return np.zeros(self.tree.nboxes, dtype=np.float64) + + local_expansion_zeros = multipole_expansion_zeros + + def output_zeros(self): + return np.zeros(self.tree.ntargets, dtype=np.float64) + + def _get_source_slice(self, ibox): + pstart = self.tree.box_source_starts[ibox] + return slice( + pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) + + def _get_target_slice(self, ibox): + pstart = self.tree.box_target_starts[ibox] + return slice( + pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) + + def reorder_sources(self, source_array): + return source_array[self.tree.user_source_ids] + + def reorder_potentials(self, potentials): + return potentials[self.tree.sorted_target_ids] + + @staticmethod + def timing_future(ops): + return DummyTimingFuture.from_op_count(ops) + + def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): + mpoles = self.multipole_expansion_zeros() + ops = 0 + + for ibox in source_boxes: + pslice = self._get_source_slice(ibox) + mpoles[ibox] += np.sum(src_weights[pslice]) + ops += src_weights[pslice].size + + return mpoles, self.timing_future(ops) + + def coarsen_multipoles(self, level_start_source_parent_box_nrs, + source_parent_boxes, mpoles): + tree = self.tree + ops = 0 + + # nlevels-1 is the last valid level index + # nlevels-2 is the last valid level that could have children + # + # 3 is the last relevant source_level. + # 2 is the last relevant target_level. + # (because no level 1 box will be well-separated from another) + for source_level in range(tree.nlevels-1, 2, -1): + target_level = source_level - 1 + start, stop = level_start_source_parent_box_nrs[ + target_level:target_level+2] + for ibox in source_parent_boxes[start:stop]: + for child in tree.box_child_ids[:, ibox]: + if child: + mpoles[ibox] += mpoles[child] + ops += 1 + + return mpoles, self.timing_future(ops) + + def eval_direct(self, target_boxes, neighbor_sources_starts, + neighbor_sources_lists, src_weights): + pot = self.output_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_boxes): + tgt_pslice = self._get_target_slice(tgt_ibox) + + src_sum = 0 + nsrcs = 0 + start, end = neighbor_sources_starts[itgt_box:itgt_box+2] + #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) + for src_ibox in neighbor_sources_lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_weights[src_pslice].size + + src_sum += np.sum(src_weights[src_pslice]) + + pot[tgt_pslice] = src_sum + ops += pot[tgt_pslice].size * nsrcs + + return pot, self.timing_future(ops) + + def multipole_to_local(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, + starts, lists, mpole_exps): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + contrib = 0 + #print tgt_ibox, "<-", lists[start:end] + for src_ibox in lists[start:end]: + contrib += mpole_exps[src_ibox] + ops += 1 + + local_exps[tgt_ibox] += contrib + + return local_exps, self.timing_future(ops) + + def eval_multipoles(self, + target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, + mpole_exps): + pot = self.output_zeros() + ops = 0 + + for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): + for itgt_box, tgt_ibox in \ + enumerate(target_boxes_by_source_level[level]): + tgt_pslice = self._get_target_slice(tgt_ibox) + + contrib = 0 + + start, end = ssn.starts[itgt_box:itgt_box+2] + for src_ibox in ssn.lists[start:end]: + contrib += mpole_exps[src_ibox] + + pot[tgt_pslice] += contrib + ops += pot[tgt_pslice].size * (end - start) + + return pot, self.timing_future(ops) + + def form_locals(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, starts, lists, src_weights): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + #print "LIST 4", tgt_ibox, "<-", lists[start:end] + contrib = 0 + nsrcs = 0 + for src_ibox in lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_weights[src_pslice].size + + contrib += np.sum(src_weights[src_pslice]) + + local_exps[tgt_ibox] += contrib + ops += nsrcs + + return local_exps, self.timing_future(ops) + + def refine_locals(self, level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, local_exps): + ops = 0 + + for target_lev in range(self.tree.nlevels): + start, stop = level_start_target_or_target_parent_box_nrs[ + target_lev:target_lev+2] + for ibox in target_or_target_parent_boxes[start:stop]: + local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] + ops += 1 + + return local_exps, self.timing_future(ops) + + def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): + pot = self.output_zeros() + ops = 0 + + for ibox in target_boxes: + tgt_pslice = self._get_target_slice(ibox) + pot[tgt_pslice] += local_exps[ibox] + ops += pot[tgt_pslice].size + + return pot, self.timing_future(ops) + + def finalize_potentials(self, potentials): + return potentials + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/test/test_fmm.py b/test/test_fmm.py index cb0b061fc46a46aeefb47dc48460e9f13fb6c49b..80417212e79b0d9aa12e8fd6d1caed8cba954b47 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -36,7 +36,8 @@ from boxtree.tools import ( # noqa: F401 make_normal_particle_array as p_normal, make_surface_particle_array as p_surface, make_uniform_particle_array as p_uniform, - particle_array_to_host) + particle_array_to_host, + ConstantOneExpansionWrangler) import logging logger = logging.getLogger(__name__) @@ -44,186 +45,13 @@ logger = logging.getLogger(__name__) # {{{ fmm interaction completeness test -from boxtree.tools import return_timing_data - - -class ConstantOneExpansionWrangler(object): - """This implements the 'analytical routines' for a Green's function that is - constant 1 everywhere. For 'charges' of 'ones', this should get every particle - a copy of the particle count. - """ - - def __init__(self, tree): - self.tree = tree - - def multipole_expansion_zeros(self): - return np.zeros(self.tree.nboxes, dtype=np.float64) - - local_expansion_zeros = multipole_expansion_zeros - - def potential_zeros(self): - return np.zeros(self.tree.ntargets, dtype=np.float64) - - def _get_source_slice(self, ibox): - pstart = self.tree.box_source_starts[ibox] - return slice( - pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) - - def _get_target_slice(self, ibox): - pstart = self.tree.box_target_starts[ibox] - return slice( - pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) - - def reorder_sources(self, source_array): - return source_array[self.tree.user_source_ids] - - def reorder_potentials(self, potentials): - return potentials[self.tree.sorted_target_ids] - - @return_timing_data - def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): - mpoles = self.multipole_expansion_zeros() - for ibox in source_boxes: - pslice = self._get_source_slice(ibox) - mpoles[ibox] += np.sum(src_weights[pslice]) - - return mpoles - - @return_timing_data - def coarsen_multipoles(self, level_start_source_parent_box_nrs, - source_parent_boxes, mpoles): - tree = self.tree - - # nlevels-1 is the last valid level index - # nlevels-2 is the last valid level that could have children - # - # 3 is the last relevant source_level. - # 2 is the last relevant target_level. - # (because no level 1 box will be well-separated from another) - for source_level in range(tree.nlevels-1, 2, -1): - target_level = source_level - 1 - start, stop = level_start_source_parent_box_nrs[ - target_level:target_level+2] - for ibox in source_parent_boxes[start:stop]: - for child in tree.box_child_ids[:, ibox]: - if child: - mpoles[ibox] += mpoles[child] - - return mpoles - - @return_timing_data - def eval_direct(self, target_boxes, neighbor_sources_starts, - neighbor_sources_lists, src_weights): - pot = self.potential_zeros() - - for itgt_box, tgt_ibox in enumerate(target_boxes): - tgt_pslice = self._get_target_slice(tgt_ibox) - - src_sum = 0 - start, end = neighbor_sources_starts[itgt_box:itgt_box+2] - #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) - for src_ibox in neighbor_sources_lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - src_sum += np.sum(src_weights[src_pslice]) - - pot[tgt_pslice] = src_sum - - return pot - - @return_timing_data - def multipole_to_local(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, - starts, lists, mpole_exps): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - contrib = 0 - #print tgt_ibox, "<-", lists[start:end] - for src_ibox in lists[start:end]: - contrib += mpole_exps[src_ibox] - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def eval_multipoles(self, - target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, - mpole_exps): - pot = self.potential_zeros() - - for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): - for itgt_box, tgt_ibox in \ - enumerate(target_boxes_by_source_level[level]): - tgt_pslice = self._get_target_slice(tgt_ibox) - - contrib = 0 - - start, end = ssn.starts[itgt_box:itgt_box+2] - for src_ibox in ssn.lists[start:end]: - contrib += mpole_exps[src_ibox] - - pot[tgt_pslice] += contrib - - return pot - - @return_timing_data - def form_locals(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, starts, lists, src_weights): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - #print "LIST 4", tgt_ibox, "<-", lists[start:end] - contrib = 0 - for src_ibox in lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - contrib += np.sum(src_weights[src_pslice]) - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def refine_locals(self, level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, local_exps): - - for target_lev in range(1, self.tree.nlevels): - start, stop = level_start_target_or_target_parent_box_nrs[ - target_lev:target_lev+2] - for ibox in target_or_target_parent_boxes[start:stop]: - local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] - - return local_exps - - @return_timing_data - def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.potential_zeros() - - for ibox in target_boxes: - tgt_pslice = self._get_target_slice(ibox) - pot[tgt_pslice] += local_exps[ibox] - - return pot - - def finalize_potentials(self, potentials): - return potentials - - class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( ConstantOneExpansionWrangler): def __init__(self, tree, filtered_targets): ConstantOneExpansionWrangler.__init__(self, tree) self.filtered_targets = filtered_targets - def potential_zeros(self): + def output_zeros(self): return np.zeros(self.filtered_targets.nfiltered_targets, dtype=np.float64) def _get_target_slice(self, ibox):