diff --git a/boxtree/distributed/perf_model.py b/boxtree/distributed/perf_model.py index 32103030fac2c104078783ac0e85189839f2c3d3..34eb45ca30407b4479dc6a3f67a70af958769b77 100644 --- a/boxtree/distributed/perf_model.py +++ b/boxtree/distributed/perf_model.py @@ -476,9 +476,9 @@ class PerformanceModel: result = self.time_result[0] if wall_time: - dependent_value = result[y_name].wall_elapsed + dependent_value = result[y_name]["wall_elapsed"] else: - dependent_value = result[y_name].process_elapsed + dependent_value = result[y_name]["process_elapsed"] independent_value = result[x_name[0]] coeff = dependent_value / independent_value @@ -490,9 +490,9 @@ class PerformanceModel: for iresult, result in enumerate(self.time_result): if wall_time: - dependent_value[iresult] = result[y_name].wall_elapsed + dependent_value[iresult] = result[y_name]["wall_elapsed"] else: - dependent_value[iresult] = result[y_name].process_elapsed + dependent_value[iresult] = result[y_name]["process_elapsed"] for icol, variable_name in enumerate(x_name): coeff_matrix[iresult, icol] = result[variable_name] @@ -688,8 +688,8 @@ class PerformanceModel: elif isinstance(entry, dict): converted_result[field_name] = TimingResult( - entry['wall_elapsed'], - entry['process_elapsed'] + wall_elapsed=entry['wall_elapsed'], + process_elapsed=entry['process_elapsed'] ) else: @@ -718,8 +718,8 @@ class PerformanceModel: elif isinstance(entry, TimingResult): current_output[field_name] = { - 'wall_elapsed': entry.wall_elapsed, - 'process_elapsed': entry.process_elapsed + 'wall_elapsed': entry.get("wall_elapsed"), + 'process_elapsed': entry.get("process_elapsed") } output.append(current_output) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index 6e9a6a316651e44e311d23fb93e22e10a90ac662..06ce971fb90b9e8233acd863c967d58399bd2c88 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -25,7 +25,16 @@ THE SOFTWARE. import logging logger = logging.getLogger(__name__) -from pytools import ProcessLogger, Record + +try: + # Python 3 + from collections.abc import Mapping +except ImportError: + # Python 2 + from collections import Mapping + + +from pytools import ProcessLogger def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): @@ -357,22 +366,44 @@ class ExpansionWranglerInterface: # {{{ timing result -class TimingResult(Record): - """ - .. attribute:: wall_elapsed - .. attribute:: process_elapsed +class TimingResult(Mapping): + """Interface for returned timing data. + + This supports accessing timing results via a mapping interface, along with + combining results via :meth:`merge`. + + .. automethod:: merge """ - def __init__(self, wall_elapsed, process_elapsed): - Record.__init__(self, - wall_elapsed=wall_elapsed, - process_elapsed=process_elapsed) + def __init__(self, *args, **kwargs): + """See constructor for :class:`dict`.""" + self._mapping = dict(*args, **kwargs) + + def __getitem__(self, key): + return self._mapping[key] + + def __iter__(self): + return iter(self._mapping) + + def __len__(self): + return len(self._mapping) - def __add__(self, other): - return TimingResult( - self.wall_elapsed + other.wall_elapsed, - self.process_elapsed + other.process_elapsed - ) + def merge(self, other): + """Merge this result with another by adding together common fields.""" + result = {} + + for key in self: + val = self.get(key) + other_val = other.get(key) + + if val is None or other_val is None: + continue + + result[key] = val + other_val + + return type(self)(result) + + __add__ = merge # }}} @@ -408,17 +439,6 @@ class TimingRecorder(object): def add(self, description, future): self.futures[description].append(future) - def merge(self, result1, result2): - wall_elapsed = None - process_elapsed = None - - if None not in (result1.wall_elapsed, result2.wall_elapsed): - wall_elapsed = result1.wall_elapsed + result2.wall_elapsed - if None not in (result1.process_elapsed, result2.process_elapsed): - process_elapsed = result1.process_elapsed + result2.process_elapsed - - return TimingResult(wall_elapsed, process_elapsed) - def summarize(self): result = {} @@ -427,7 +447,7 @@ class TimingRecorder(object): timing_result = next(futures).result() for future in futures: - timing_result = self.merge(timing_result, future.result()) + timing_result = timing_result.merge(future.result()) result[description] = timing_result diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index c076e56e2f47470bbb70c3acc1fcf0c7da8d7142..33e9dec7d087cecb0610a35e9ef08a6daebc631d 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -41,6 +41,10 @@ __doc__ = """Integrates :mod:`boxtree` with class FMMLibExpansionWrangler(object): """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` by using pyfmmlib. + + Timing results returned by this wrangler contains the values *wall_elapsed* + and (optionally, if supported) *process_elapsed*, which measure wall time + and process time in seconds, respectively. """ # {{{ constructor diff --git a/boxtree/tools.py b/boxtree/tools.py index f390d0904c3129dc8e4ff451ea020110b10e0712..a2fced57a503b5c5618fadbefa0cf710dec89908 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -517,14 +517,18 @@ class DummyTimingFuture(TimingFuture): @classmethod def from_timer(cls, timer): - return cls(timer.wall_elapsed, timer.process_elapsed) + return cls(wall_elapsed=timer.wall_elapsed, + process_elapsed=timer.process_elapsed) - def __init__(self, wall_elapsed, process_elapsed): - self.wall_elapsed = wall_elapsed - self.process_elapsed = process_elapsed + @classmethod + def from_op_count(cls, op_count): + return cls(ops_elapsed=op_count) + + def __init__(self, *args, **kwargs): + self._result = TimingResult(*args, **kwargs) def result(self): - return TimingResult(self.wall_elapsed, self.process_elapsed) + return self._result def done(self): return True @@ -787,4 +791,198 @@ class AllReduceCommPattern(object): # }}} + +# {{{ constant one wrangler + +class ConstantOneExpansionWrangler(object): + """This implements the 'analytical routines' for a Green's function that is + constant 1 everywhere. For 'charges' of 'ones', this should get every particle + a copy of the particle count. + + Timing results returned by this wrangler contain the field *ops_elapsed*, + which counts approximately the number of floating-point operations required. + """ + + def __init__(self, tree): + self.tree = tree + + def multipole_expansion_zeros(self): + return np.zeros(self.tree.nboxes, dtype=np.float64) + + local_expansion_zeros = multipole_expansion_zeros + + def output_zeros(self): + return np.zeros(self.tree.ntargets, dtype=np.float64) + + def _get_source_slice(self, ibox): + pstart = self.tree.box_source_starts[ibox] + return slice( + pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) + + def _get_target_slice(self, ibox): + pstart = self.tree.box_target_starts[ibox] + return slice( + pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) + + def reorder_sources(self, source_array): + return source_array[self.tree.user_source_ids] + + def reorder_potentials(self, potentials): + return potentials[self.tree.sorted_target_ids] + + @staticmethod + def timing_future(ops): + return DummyTimingFuture.from_op_count(ops) + + def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): + mpoles = self.multipole_expansion_zeros() + ops = 0 + + for ibox in source_boxes: + pslice = self._get_source_slice(ibox) + mpoles[ibox] += np.sum(src_weights[pslice]) + ops += (pslice.stop - pslice.start) + + return mpoles, self.timing_future(ops) + + def coarsen_multipoles(self, level_start_source_parent_box_nrs, + source_parent_boxes, mpoles): + tree = self.tree + ops = 0 + + # nlevels-1 is the last valid level index + # nlevels-2 is the last valid level that could have children + # + # 3 is the last relevant source_level. + # 2 is the last relevant target_level. + # (because no level 1 box will be well-separated from another) + for source_level in range(tree.nlevels-1, 2, -1): + target_level = source_level - 1 + start, stop = level_start_source_parent_box_nrs[ + target_level:target_level+2] + for ibox in source_parent_boxes[start:stop]: + for child in tree.box_child_ids[:, ibox]: + if child: + mpoles[ibox] += mpoles[child] + ops += 1 + + return mpoles, self.timing_future(ops) + + def eval_direct(self, target_boxes, neighbor_sources_starts, + neighbor_sources_lists, src_weights): + pot = self.output_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_boxes): + tgt_pslice = self._get_target_slice(tgt_ibox) + + src_sum = 0 + nsrcs = 0 + start, end = neighbor_sources_starts[itgt_box:itgt_box+2] + #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) + for src_ibox in neighbor_sources_lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_weights[src_pslice].size + + src_sum += np.sum(src_weights[src_pslice]) + + pot[tgt_pslice] = src_sum + ops += (tgt_pslice.stop - tgt_pslice.start) * nsrcs + + return pot, self.timing_future(ops) + + def multipole_to_local(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, + starts, lists, mpole_exps): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + contrib = 0 + #print tgt_ibox, "<-", lists[start:end] + for src_ibox in lists[start:end]: + contrib += mpole_exps[src_ibox] + ops += 1 + + local_exps[tgt_ibox] += contrib + + return local_exps, self.timing_future(ops) + + def eval_multipoles(self, + target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, + mpole_exps): + pot = self.output_zeros() + ops = 0 + + for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): + for itgt_box, tgt_ibox in \ + enumerate(target_boxes_by_source_level[level]): + tgt_pslice = self._get_target_slice(tgt_ibox) + + contrib = 0 + + start, end = ssn.starts[itgt_box:itgt_box+2] + for src_ibox in ssn.lists[start:end]: + contrib += mpole_exps[src_ibox] + + pot[tgt_pslice] += contrib + ops += (tgt_pslice.stop - tgt_pslice.start) * (end - start) + + return pot, self.timing_future(ops) + + def form_locals(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, starts, lists, src_weights): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + #print "LIST 4", tgt_ibox, "<-", lists[start:end] + contrib = 0 + nsrcs = 0 + for src_ibox in lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_pslice.stop - src_pslice.start + + contrib += np.sum(src_weights[src_pslice]) + + local_exps[tgt_ibox] += contrib + ops += nsrcs + + return local_exps, self.timing_future(ops) + + def refine_locals(self, level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, local_exps): + ops = 0 + + for target_lev in range(self.tree.nlevels): + start, stop = level_start_target_or_target_parent_box_nrs[ + target_lev:target_lev+2] + for ibox in target_or_target_parent_boxes[start:stop]: + local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] + ops += 1 + + return local_exps, self.timing_future(ops) + + def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): + pot = self.output_zeros() + ops = 0 + + for ibox in target_boxes: + tgt_pslice = self._get_target_slice(ibox) + ops += tgt_pslice.stop - tgt_pslice.start + pot[tgt_pslice] += local_exps[ibox] + + return pot, self.timing_future(ops) + + def finalize_potentials(self, potentials): + return potentials + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/test/test_fmm.py b/test/test_fmm.py index cb0b061fc46a46aeefb47dc48460e9f13fb6c49b..80417212e79b0d9aa12e8fd6d1caed8cba954b47 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -36,7 +36,8 @@ from boxtree.tools import ( # noqa: F401 make_normal_particle_array as p_normal, make_surface_particle_array as p_surface, make_uniform_particle_array as p_uniform, - particle_array_to_host) + particle_array_to_host, + ConstantOneExpansionWrangler) import logging logger = logging.getLogger(__name__) @@ -44,186 +45,13 @@ logger = logging.getLogger(__name__) # {{{ fmm interaction completeness test -from boxtree.tools import return_timing_data - - -class ConstantOneExpansionWrangler(object): - """This implements the 'analytical routines' for a Green's function that is - constant 1 everywhere. For 'charges' of 'ones', this should get every particle - a copy of the particle count. - """ - - def __init__(self, tree): - self.tree = tree - - def multipole_expansion_zeros(self): - return np.zeros(self.tree.nboxes, dtype=np.float64) - - local_expansion_zeros = multipole_expansion_zeros - - def potential_zeros(self): - return np.zeros(self.tree.ntargets, dtype=np.float64) - - def _get_source_slice(self, ibox): - pstart = self.tree.box_source_starts[ibox] - return slice( - pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) - - def _get_target_slice(self, ibox): - pstart = self.tree.box_target_starts[ibox] - return slice( - pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) - - def reorder_sources(self, source_array): - return source_array[self.tree.user_source_ids] - - def reorder_potentials(self, potentials): - return potentials[self.tree.sorted_target_ids] - - @return_timing_data - def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): - mpoles = self.multipole_expansion_zeros() - for ibox in source_boxes: - pslice = self._get_source_slice(ibox) - mpoles[ibox] += np.sum(src_weights[pslice]) - - return mpoles - - @return_timing_data - def coarsen_multipoles(self, level_start_source_parent_box_nrs, - source_parent_boxes, mpoles): - tree = self.tree - - # nlevels-1 is the last valid level index - # nlevels-2 is the last valid level that could have children - # - # 3 is the last relevant source_level. - # 2 is the last relevant target_level. - # (because no level 1 box will be well-separated from another) - for source_level in range(tree.nlevels-1, 2, -1): - target_level = source_level - 1 - start, stop = level_start_source_parent_box_nrs[ - target_level:target_level+2] - for ibox in source_parent_boxes[start:stop]: - for child in tree.box_child_ids[:, ibox]: - if child: - mpoles[ibox] += mpoles[child] - - return mpoles - - @return_timing_data - def eval_direct(self, target_boxes, neighbor_sources_starts, - neighbor_sources_lists, src_weights): - pot = self.potential_zeros() - - for itgt_box, tgt_ibox in enumerate(target_boxes): - tgt_pslice = self._get_target_slice(tgt_ibox) - - src_sum = 0 - start, end = neighbor_sources_starts[itgt_box:itgt_box+2] - #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) - for src_ibox in neighbor_sources_lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - src_sum += np.sum(src_weights[src_pslice]) - - pot[tgt_pslice] = src_sum - - return pot - - @return_timing_data - def multipole_to_local(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, - starts, lists, mpole_exps): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - contrib = 0 - #print tgt_ibox, "<-", lists[start:end] - for src_ibox in lists[start:end]: - contrib += mpole_exps[src_ibox] - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def eval_multipoles(self, - target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, - mpole_exps): - pot = self.potential_zeros() - - for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): - for itgt_box, tgt_ibox in \ - enumerate(target_boxes_by_source_level[level]): - tgt_pslice = self._get_target_slice(tgt_ibox) - - contrib = 0 - - start, end = ssn.starts[itgt_box:itgt_box+2] - for src_ibox in ssn.lists[start:end]: - contrib += mpole_exps[src_ibox] - - pot[tgt_pslice] += contrib - - return pot - - @return_timing_data - def form_locals(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, starts, lists, src_weights): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - #print "LIST 4", tgt_ibox, "<-", lists[start:end] - contrib = 0 - for src_ibox in lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - contrib += np.sum(src_weights[src_pslice]) - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def refine_locals(self, level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, local_exps): - - for target_lev in range(1, self.tree.nlevels): - start, stop = level_start_target_or_target_parent_box_nrs[ - target_lev:target_lev+2] - for ibox in target_or_target_parent_boxes[start:stop]: - local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] - - return local_exps - - @return_timing_data - def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.potential_zeros() - - for ibox in target_boxes: - tgt_pslice = self._get_target_slice(ibox) - pot[tgt_pslice] += local_exps[ibox] - - return pot - - def finalize_potentials(self, potentials): - return potentials - - class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( ConstantOneExpansionWrangler): def __init__(self, tree, filtered_targets): ConstantOneExpansionWrangler.__init__(self, tree) self.filtered_targets = filtered_targets - def potential_zeros(self): + def output_zeros(self): return np.zeros(self.filtered_targets.nfiltered_targets, dtype=np.float64) def _get_target_slice(self, ibox):