diff --git a/boxtree/fmm.py b/boxtree/fmm.py index b2dd2764eb6ae49d41e7335c65ff025dc7f23ce6..54a16495314457f9d0bc6c5884e6583c632ce1dd 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -25,10 +25,10 @@ THE SOFTWARE. import logging logger = logging.getLogger(__name__) -from pytools import ProcessLogger +from pytools import ProcessLogger, Record -def drive_fmm(traversal, expansion_wrangler, src_weights): +def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): """Top-level driver routine for a fast multipole calculation. In part, this is intended as a template for custom FMMs, in the sense that @@ -44,8 +44,12 @@ def drive_fmm(traversal, expansion_wrangler, src_weights): :class:`ExpansionWranglerInterface`. :arg src_weights: Source 'density/weights/charges'. Passed unmodified to *expansion_wrangler*. + :arg timing_data: Either *None*, or a :class:`dict` that is populated with + timing information for the stages of the algorithm (in the form of + :class:`TimingResult`), if such information is available. Returns the potentials computed by *expansion_wrangler*. + """ wrangler = expansion_wrangler @@ -53,50 +57,59 @@ def drive_fmm(traversal, expansion_wrangler, src_weights): # to the expansion wrangler and should not be passed. fmm_proc = ProcessLogger(logger, "qbx fmm") + recorder = TimingRecorder() src_weights = wrangler.reorder_sources(src_weights) # {{{ "Step 2.1:" Construct local multipoles - mpole_exps = wrangler.form_multipoles( + mpole_exps, timing_future = wrangler.form_multipoles( traversal.level_start_source_box_nrs, traversal.source_boxes, src_weights) + recorder.add("form_multipoles", timing_future) + # }}} # {{{ "Step 2.2:" Propagate multipoles upward - wrangler.coarsen_multipoles( + mpole_exps, timing_future = wrangler.coarsen_multipoles( traversal.level_start_source_parent_box_nrs, traversal.source_parent_boxes, mpole_exps) + recorder.add("coarsen_multipoles", timing_future) + # mpole_exps is called Phi in [1] # }}} # {{{ "Stage 3:" Direct evaluation from neighbor source boxes ("list 1") - potentials = wrangler.eval_direct( + potentials, timing_future = wrangler.eval_direct( traversal.target_boxes, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, src_weights) + recorder.add("eval_direct", timing_future) + # these potentials are called alpha in [1] # }}} # {{{ "Stage 4:" translate separated siblings' ("list 2") mpoles to local - local_exps = wrangler.multipole_to_local( + local_exps, timing_future = wrangler.multipole_to_local( traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, traversal.from_sep_siblings_starts, traversal.from_sep_siblings_lists, mpole_exps) + recorder.add("multipole_to_local", timing_future) + # local_exps represents both Gamma and Delta in [1] # }}} @@ -106,59 +119,81 @@ def drive_fmm(traversal, expansion_wrangler, src_weights): # (the point of aiming this stage at particles is specifically to keep its # contribution *out* of the downward-propagating local expansions) - potentials = potentials + wrangler.eval_multipoles( + mpole_result, timing_future = wrangler.eval_multipoles( traversal.target_boxes_sep_smaller_by_source_level, traversal.from_sep_smaller_by_level, mpole_exps) + recorder.add("eval_multipoles", timing_future) + + potentials = potentials + mpole_result + # these potentials are called beta in [1] if traversal.from_sep_close_smaller_starts is not None: logger.debug("evaluate separated close smaller interactions directly " "('list 3 close')") - potentials = potentials + wrangler.eval_direct( + direct_result, timing_future = wrangler.eval_direct( traversal.target_boxes, traversal.from_sep_close_smaller_starts, traversal.from_sep_close_smaller_lists, src_weights) + recorder.add("eval_direct", timing_future) + + potentials = potentials + direct_result + # }}} # {{{ "Stage 6:" form locals for separated bigger source boxes ("list 4") - local_exps = local_exps + wrangler.form_locals( + local_result, timing_future = wrangler.form_locals( traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, traversal.from_sep_bigger_starts, traversal.from_sep_bigger_lists, src_weights) + recorder.add("form_locals", timing_future) + + local_exps = local_exps + local_result + if traversal.from_sep_close_bigger_starts is not None: - potentials = potentials + wrangler.eval_direct( + direct_result, timing_future = wrangler.eval_direct( traversal.target_or_target_parent_boxes, traversal.from_sep_close_bigger_starts, traversal.from_sep_close_bigger_lists, src_weights) + recorder.add("eval_direct", timing_future) + + potentials = potentials + direct_result + # }}} # {{{ "Stage 7:" propagate local_exps downward - wrangler.refine_locals( + local_exps, timing_future = wrangler.refine_locals( traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, local_exps) + recorder.add("refine_locals", timing_future) + # }}} # {{{ "Stage 8:" evaluate locals - potentials = potentials + wrangler.eval_locals( + local_result, timing_future = wrangler.eval_locals( traversal.level_start_target_box_nrs, traversal.target_boxes, local_exps) + recorder.add("eval_locals", timing_future) + + potentials = potentials + local_result + # }}} result = wrangler.reorder_potentials(potentials) @@ -167,6 +202,9 @@ def drive_fmm(traversal, expansion_wrangler, src_weights): fmm_proc.done() + if timing_data is not None: + timing_data.update(recorder.summarize()) + return result @@ -181,6 +219,13 @@ class ExpansionWranglerInterface: Will usually hold a reference (and thereby be specific to) a :class:`boxtree.Tree` instance. + + Functions that support returning timing data return a value supporting the + :class:`TimingFuture` interface. + + .. versionchanged:: 2018.1 + + Changed (a subset of) functions to return timing data. """ def multipole_expansion_zeros(self): @@ -221,6 +266,8 @@ class ExpansionWranglerInterface: containing multipole expansions in *source_boxes* due to sources with *src_weights*. All other expansions must be zero. + + :return: A pair (*mpoles*, *timing_future*). """ def coarsen_multipoles(self, level_start_source_parent_box_nrs, @@ -230,7 +277,7 @@ class ExpansionWranglerInterface: *mpole* and add the resulting expansion into the box's multipole expansion in *mpole*. - :returns: *mpoles* + :returns: A pair (*mpoles*, *timing_future*). """ def eval_direct(self, target_boxes, neighbor_sources_starts, @@ -239,7 +286,8 @@ class ExpansionWranglerInterface: neighbor sources due to *src_weights*, which use :ref:`csr` and are indexed like *target_boxes*. - :returns: a new potential array, see :meth:`output_zeros`. + :returns: A pair (*pot*, *timing_future*), where *pot* is a + a new potential array, see :meth:`output_zeros`. """ def multipole_to_local(self, @@ -251,8 +299,8 @@ class ExpansionWranglerInterface: array of local expansions. *starts* and *lists* use :ref:`csr`, and *starts* is indexed like *target_or_target_parent_boxes*. - :returns: a new (local) expansion array, see - :meth:`local_expansion_zeros`. + :returns: A pair (*pot*, *timing_future*) where *pot* is + a new (local) expansion array, see :meth:`local_expansion_zeros`. """ def eval_multipoles(self, @@ -263,7 +311,8 @@ class ExpansionWranglerInterface: *starts* and *lists* in *from_sep_smaller_by_level[i]* use :ref:`csr` and *starts* is indexed like *target_boxes_by_source_level[i]*. - :returns: a new potential array, see :meth:`output_zeros`. + :returns: A pair (*pot*, *timing_future*) where *pot* is a new potential + array, see :meth:`output_zeros`. """ def form_locals(self, @@ -275,10 +324,9 @@ class ExpansionWranglerInterface: use :ref:`csr` and *starts* is indexed like *target_or_target_parent_boxes*. - :returns: a new local expansion array, see - :meth:`local_expansion_zeros`. + :returns: A pair (*pot*, *timing_future*) where *pot* is a new + local expansion array, see :meth:`local_expansion_zeros`. """ - pass def refine_locals(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): @@ -286,14 +334,15 @@ class ExpansionWranglerInterface: translate the box's parent's local expansion in *local_exps* and add the resulting expansion into the box's local expansion in *local_exps*. - :returns: *local_exps* + :returns: A pair (*local_exps*, *timing_future*). """ def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): """For each box in *target_boxes*, evaluate the local expansion in *local_exps* and return a new potential array. - :returns: a new potential array, see :meth:`output_zeros`. + :returns: A pair (*pot*, *timing_future*) where *pot* is a new potential + array, see :meth:`output_zeros`. """ def finalize_potentials(self, potentials): @@ -306,4 +355,79 @@ class ExpansionWranglerInterface: # }}} +# {{{ timing result + +class TimingResult(Record): + """ + .. attribute:: wall_elapsed + .. attribute:: process_elapsed + """ + + def __init__(self, wall_elapsed, process_elapsed): + Record.__init__(self, + wall_elapsed=wall_elapsed, + process_elapsed=process_elapsed) + +# }}} + + +# {{{ timing future + +class TimingFuture(object): + """Returns timing data for a potentially asynchronous operation. + + .. automethod:: result + .. automethod:: done + """ + + def result(self): + """Return a :class:`TimingResult`. May block.""" + raise NotImplementedError + + def done(self): + """Return *True* if the operation is complete.""" + raise NotImplementedError + +# }}} + + +# {{{ timing recorder + +class TimingRecorder(object): + + def __init__(self): + from collections import defaultdict + self.futures = defaultdict(list) + + def add(self, description, future): + self.futures[description].append(future) + + def merge(self, result1, result2): + wall_elapsed = None + process_elapsed = None + + if None not in (result1.wall_elapsed, result2.wall_elapsed): + wall_elapsed = result1.wall_elapsed + result2.wall_elapsed + if None not in (result1.process_elapsed, result2.process_elapsed): + process_elapsed = result1.process_elapsed + result2.process_elapsed + + return TimingResult(wall_elapsed, process_elapsed) + + def summarize(self): + result = {} + + for description, futures_list in self.futures.items(): + futures = iter(futures_list) + + timing_result = next(futures).result() + for future in futures: + timing_result = self.merge(timing_result, future.result()) + + result[description] = timing_result + + return result + +# }}} + + # vim: filetype=pyopencl:fdm=marker diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index ddf7e2000b16a9195eab5f6c38ec438899c2367d..c076e56e2f47470bbb70c3acc1fcf0c7da8d7142 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -27,6 +27,7 @@ THE SOFTWARE. import numpy as np from pytools import memoize_method, log_process +from boxtree.tools import return_timing_data import logging logger = logging.getLogger(__name__) @@ -420,6 +421,7 @@ class FMMLibExpansionWrangler(object): } @log_process(logger) + @return_timing_data def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): formmp = self.get_routine("%ddformmp" + self.dp_suffix) @@ -459,6 +461,7 @@ class FMMLibExpansionWrangler(object): return mpoles @log_process(logger) + @return_timing_data def coarsen_multipoles(self, level_start_source_parent_box_nrs, source_parent_boxes, mpoles): tree = self.tree @@ -511,7 +514,10 @@ class FMMLibExpansionWrangler(object): target_mpoles_view[ ibox - target_level_start_ibox] += new_mp[..., 0].T + return mpoles + @log_process(logger) + @return_timing_data def eval_direct(self, target_boxes, neighbor_sources_starts, neighbor_sources_lists, src_weights): output = self.output_zeros() @@ -553,6 +559,7 @@ class FMMLibExpansionWrangler(object): return output @log_process(logger) + @return_timing_data def multipole_to_local(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, @@ -638,6 +645,7 @@ class FMMLibExpansionWrangler(object): return local_exps @log_process(logger) + @return_timing_data def eval_multipoles(self, target_boxes_by_source_level, sep_smaller_nonsiblings_by_level, mpole_exps): @@ -680,6 +688,7 @@ class FMMLibExpansionWrangler(object): return output @log_process(logger) + @return_timing_data def form_locals(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weights): @@ -731,6 +740,7 @@ class FMMLibExpansionWrangler(object): return local_exps @log_process(logger) + @return_timing_data def refine_locals(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): @@ -777,6 +787,7 @@ class FMMLibExpansionWrangler(object): return local_exps @log_process(logger) + @return_timing_data def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): output = self.output_zeros() taeval = self.get_expn_eval_routine("ta") diff --git a/boxtree/tools.py b/boxtree/tools.py index b8346378d85eae6420be3338e3bf97305b253cf0..97cd43d6db37a506d6551e91b77d72f86e1f09f7 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -30,6 +30,7 @@ import pyopencl.array # noqa from pyopencl.tools import dtype_to_c_struct from mako.template import Template from pytools.obj_array import make_obj_array +from boxtree.fmm import TimingFuture, TimingResult AXIS_NAMES = ("x", "y", "z", "w") @@ -510,6 +511,51 @@ class MapValuesKernel(object): # }}} +# {{{ time recording tool + +class DummyTimingFuture(TimingFuture): + + @classmethod + def from_timer(cls, timer): + return cls(timer.wall_elapsed, timer.process_elapsed) + + def __init__(self, wall_elapsed, process_elapsed): + self.wall_elapsed = wall_elapsed + self.process_elapsed = process_elapsed + + def result(self): + return TimingResult(self.wall_elapsed, self.process_elapsed) + + def done(self): + return True + + +def return_timing_data(wrapped): + """A decorator for recording timing data for a function call. + + The decorated function returns a tuple (*retval*, *timing_future*) + where *retval* is the original return value and *timing_future* + supports the timing data future interface in :mod:`boxtree.fmm`. + """ + + from pytools import ProcessTimer + + def wrapper(*args, **kwargs): + timer = ProcessTimer() + retval = wrapped(*args, **kwargs) + timer.done() + + future = DummyTimingFuture.from_timer(timer) + return (retval, future) + + from functools import update_wrapper + new_wrapper = update_wrapper(wrapper, wrapped) + + return new_wrapper + +# }}} + + # {{{ binary search from mako.template import Template diff --git a/boxtree/version.py b/boxtree/version.py index 9bf59978d17d92678cfd2768158efe7f0305c447..aac009869c50439ea6faf456d7ab641f1f8e915d 100644 --- a/boxtree/version.py +++ b/boxtree/version.py @@ -1,2 +1,2 @@ -VERSION = (2013, 1) +VERSION = (2018, 1) VERSION_TEXT = ".".join(str(i) for i in VERSION) diff --git a/doc/fmm.rst b/doc/fmm.rst index bc23d5688b69bdb65a021b5cb5aa445582f2cb03..97ea4e06a88a75e0a9626a0a26aeef468b6d0246 100644 --- a/doc/fmm.rst +++ b/doc/fmm.rst @@ -10,6 +10,10 @@ FMM driver :undoc-members: :member-order: bysource +.. autoclass:: TimingResult + +.. autoclass:: TimingFuture + Integration with PyFMMLib ------------------------- diff --git a/doc/misc.rst b/doc/misc.rst index 29226b598fc702f6db6d07aedf59e3f305e148f3..1086d8b4b284827a6adf0ff5728fa3626af16c7b 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -24,13 +24,18 @@ for instructions. User-visible Changes ==================== -Version 2013.1 +Version 2018.1 -------------- .. note:: This version is currently under development. You can get snapshots from boxtree's `git repository `_ +* Added *timing_data* parameter to FMM driver. + +Version 2013.1 +-------------- + * Initial release. .. _license: diff --git a/test/test_fmm.py b/test/test_fmm.py index 2eb9eb3e3a86292740a24aacd342ae2924d10050..cb0b061fc46a46aeefb47dc48460e9f13fb6c49b 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -44,6 +44,9 @@ logger = logging.getLogger(__name__) # {{{ fmm interaction completeness test +from boxtree.tools import return_timing_data + + class ConstantOneExpansionWrangler(object): """This implements the 'analytical routines' for a Green's function that is constant 1 everywhere. For 'charges' of 'ones', this should get every particle @@ -77,6 +80,7 @@ class ConstantOneExpansionWrangler(object): def reorder_potentials(self, potentials): return potentials[self.tree.sorted_target_ids] + @return_timing_data def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): mpoles = self.multipole_expansion_zeros() for ibox in source_boxes: @@ -85,6 +89,7 @@ class ConstantOneExpansionWrangler(object): return mpoles + @return_timing_data def coarsen_multipoles(self, level_start_source_parent_box_nrs, source_parent_boxes, mpoles): tree = self.tree @@ -104,6 +109,9 @@ class ConstantOneExpansionWrangler(object): if child: mpoles[ibox] += mpoles[child] + return mpoles + + @return_timing_data def eval_direct(self, target_boxes, neighbor_sources_starts, neighbor_sources_lists, src_weights): pot = self.potential_zeros() @@ -123,6 +131,7 @@ class ConstantOneExpansionWrangler(object): return pot + @return_timing_data def multipole_to_local(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, @@ -141,6 +150,7 @@ class ConstantOneExpansionWrangler(object): return local_exps + @return_timing_data def eval_multipoles(self, target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, mpole_exps): @@ -161,6 +171,7 @@ class ConstantOneExpansionWrangler(object): return pot + @return_timing_data def form_locals(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weights): @@ -180,6 +191,7 @@ class ConstantOneExpansionWrangler(object): return local_exps + @return_timing_data def refine_locals(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): @@ -191,6 +203,7 @@ class ConstantOneExpansionWrangler(object): return local_exps + @return_timing_data def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): pot = self.potential_zeros() @@ -564,7 +577,11 @@ def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): dipole_vec=dipole_vec) from boxtree.fmm import drive_fmm - pot = drive_fmm(trav, wrangler, weights) + + timing_data = {} + pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) + print(timing_data) + assert timing_data # {{{ ref fmmlib computation