diff --git a/setup.py b/setup.py index 8d0850540362ddfc3ecedf453f2ccc53bba21a1c..9457bbdc269697f61153e755a930efc9fccae41d 100644 --- a/setup.py +++ b/setup.py @@ -94,7 +94,7 @@ setup(name="sumpy", install_requires=[ "pytools>=2018.2", "loo.py>=2017.2", - "boxtree>=2013.1", + "boxtree>=2018.1", "pytest>=2.3", "six", diff --git a/sumpy/fmm.py b/sumpy/fmm.py index ed2a1e4bd35b07c4bb1f601c6b850044fdf5539a..17d52eedccc3804394ef1907393d4ad87786a493 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -145,6 +145,53 @@ class SumpyExpansionWranglerCodeContainer(object): # }}} +# {{{ timing future + +_SECONDS_PER_NANOSECOND = 1e-9 + + +class UnableToCollectTimingData(UserWarning): + pass + + +class SumpyTimingFuture(object): + + def __init__(self, queue, events): + self.queue = queue + self.events = events + + @memoize_method + def result(self): + from boxtree.fmm import TimingResult + + if not self.queue.properties & cl.command_queue_properties.PROFILING_ENABLE: + from warnings import warn + warn( + "Profiling was not enabled in the command queue. " + "Timing data will not be collected.", + category=UnableToCollectTimingData, + stacklevel=3) + return TimingResult(wall_elapsed=None, process_elapsed=None) + + pyopencl.wait_for_events(self.events) + + result = 0 + for event in self.events: + result += ( + (event.profile.end - event.profile.start) + * _SECONDS_PER_NANOSECOND) + + return TimingResult(wall_elapsed=result, process_elapsed=None) + + def done(self): + return all( + event.get_info(cl.event_info.COMMAND_EXECUTION_STATUS) + == cl.command_execution_status.COMPLETE + for event in self.events) + +# }}} + + # {{{ expansion wrangler class SumpyExpansionWrangler(object): @@ -175,6 +222,7 @@ class SumpyExpansionWrangler(object): self.code = code_container self.queue = queue self.tree = tree + self.issued_timing_data_warning = False self.dtype = dtype @@ -305,6 +353,8 @@ class SumpyExpansionWrangler(object): kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) + events = [] + for lev in range(self.tree.nlevels): p2m = self.code.p2m(self.level_orders[lev]) start, stop = level_start_source_box_nrs[lev:lev+2] @@ -325,10 +375,11 @@ class SumpyExpansionWrangler(object): rscale=level_to_rscale(self.tree, lev), **kwargs) + events.append(evt) assert mpoles_res is mpoles_view - return mpoles + return (mpoles, SumpyTimingFuture(self.queue, events)) def coarsen_multipoles(self, level_start_source_parent_box_nrs, @@ -336,7 +387,7 @@ class SumpyExpansionWrangler(object): mpoles): tree = self.tree - evt = None + events = [] # nlevels-1 is the last valid level index # nlevels-2 is the last valid level that could have children @@ -378,12 +429,14 @@ class SumpyExpansionWrangler(object): tgt_rscale=level_to_rscale(self.tree, target_level), **self.kernel_extra_kwargs) + events.append(evt) + assert mpoles_res is target_mpoles_view - if evt is not None: - mpoles.add_event(evt) + if events: + mpoles.add_event(events[-1]) - return mpoles + return (mpoles, SumpyTimingFuture(self.queue, events)) def eval_direct(self, target_boxes, source_box_starts, source_box_lists, src_weights): @@ -394,6 +447,8 @@ class SumpyExpansionWrangler(object): kwargs.update(self.box_source_list_kwargs()) kwargs.update(self.box_target_list_kwargs()) + events = [] + evt, pot_res = self.code.p2p()(self.queue, target_boxes=target_boxes, source_box_starts=source_box_starts, @@ -402,12 +457,13 @@ class SumpyExpansionWrangler(object): result=pot, **kwargs) + events.append(evt) for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i pot_i.add_event(evt) - return pot + return (pot, SumpyTimingFuture(self.queue, events)) def multipole_to_local(self, level_start_target_box_nrs, @@ -415,6 +471,8 @@ class SumpyExpansionWrangler(object): mpole_exps): local_exps = self.local_expansion_zeros() + events = [] + for lev in range(self.tree.nlevels): start, stop = level_start_target_box_nrs[lev:lev+2] if start == stop: @@ -445,8 +503,9 @@ class SumpyExpansionWrangler(object): tgt_rscale=level_to_rscale(self.tree, lev), **self.kernel_extra_kwargs) + events.append(evt) - return local_exps + return (local_exps, SumpyTimingFuture(self.queue, events)) def eval_multipoles(self, target_boxes_by_source_level, source_boxes_by_level, mpole_exps): @@ -455,9 +514,10 @@ class SumpyExpansionWrangler(object): kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) + events = [] + wait_for = mpole_exps.events - has_evt = False for isrc_level, ssn in enumerate(source_boxes_by_level): if len(target_boxes_by_source_level[isrc_level]) == 0: continue @@ -484,19 +544,18 @@ class SumpyExpansionWrangler(object): wait_for=wait_for, **kwargs) + events.append(evt) - has_evt = True wait_for = [evt] for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - if has_evt: + if events: for pot_i in pot: - # Intentionally only adding the last event. - pot_i.add_event(evt) + pot_i.add_event(events[-1]) - return pot + return (pot, SumpyTimingFuture(self.queue, events)) def form_locals(self, level_start_target_or_target_parent_box_nrs, @@ -506,6 +565,8 @@ class SumpyExpansionWrangler(object): kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) + events = [] + for lev in range(self.tree.nlevels): start, stop = \ level_start_target_or_target_parent_box_nrs[lev:lev+2] @@ -531,15 +592,19 @@ class SumpyExpansionWrangler(object): rscale=level_to_rscale(self.tree, lev), **kwargs) + events.append(evt) assert result is target_local_exps_view - return local_exps + return (local_exps, SumpyTimingFuture(self.queue, events)) def refine_locals(self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): + + events = [] + for target_lev in range(1, self.tree.nlevels): start, stop = level_start_target_or_target_parent_box_nrs[ target_lev:target_lev+2] @@ -570,12 +635,13 @@ class SumpyExpansionWrangler(object): tgt_rscale=level_to_rscale(self.tree, target_lev), **self.kernel_extra_kwargs) + events.append(evt) assert local_exps_res is target_local_exps_view local_exps.add_event(evt) - return local_exps + return (local_exps, SumpyTimingFuture(self.queue, [evt])) def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): pot = self.output_zeros() @@ -583,6 +649,8 @@ class SumpyExpansionWrangler(object): kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) + events = [] + for lev in range(self.tree.nlevels): start, stop = level_start_target_box_nrs[lev:lev+2] if start == stop: @@ -606,11 +674,12 @@ class SumpyExpansionWrangler(object): rscale=level_to_rscale(self.tree, lev), **kwargs) + events.append(evt) for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - return pot + return (pot, SumpyTimingFuture(self.queue, events)) def finalize_potentials(self, potentials): return potentials diff --git a/test/test_fmm.py b/test/test_fmm.py index 0331db6c696043b2efb8fe27286a2d48cbd16b59..71e3f044432b4bd612337bdd55d56e421540bcc4 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -234,6 +234,62 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): pconv_verifier() +def test_sumpy_fmm_timing_data_collection(ctx_getter): + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue( + ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + nsources = 500 + dtype = np.float64 + + from boxtree.tools import ( + make_normal_particle_array as p_normal) + + knl = LaplaceKernel(2) + local_expn_class = VolumeTaylorLocalExpansion + mpole_expn_class = VolumeTaylorMultipoleExpansion + order = 1 + + sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(ctx) + weights = rng.uniform(queue, nsources, dtype=np.float64) + + out_kernels = [knl] + + from functools import partial + + from sumpy.fmm import SumpyExpansionWranglerCodeContainer + wcc = SumpyExpansionWranglerCodeContainer( + ctx, + partial(mpole_expn_class, knl), + partial(local_expn_class, knl), + out_kernels) + + wrangler = wcc.get_wrangler(queue, tree, dtype, + fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) + from boxtree.fmm import drive_fmm + + timing_data = {} + pot, = drive_fmm(trav, wrangler, weights, timing_data=timing_data) + print(timing_data) + assert timing_data + + def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO)