From 8052364ac29dcfee8099b88dfd6728907eccf60b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 15:35:21 -0500 Subject: [PATCH 01/20] Change the index style of List 4 close to `target_boxes`, simlar to the other close lists. This makes the indexing for the "close" lists more consistent, resulting in fewer surprises. See inducer/pytential#104 --- boxtree/traversal.py | 404 +++++++++++++++++++++++++-------------- boxtree/version.py | 2 +- boxtree/visualization.py | 2 +- doc/misc.rst | 7 +- 4 files changed, 265 insertions(+), 150 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 9f032a9..fa65c16 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -23,7 +23,7 @@ THE SOFTWARE. """ import numpy as np -from pytools import Record, memoize_method, memoize_in +from pytools import Record, memoize_method import pyopencl as cl import pyopencl.array # noqa import pyopencl.cltypes # noqa @@ -1163,6 +1163,188 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) # }}} +# {{{ list merger + +LIST_MERGER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + + box_id_t *output_to_input_box, + + %for ilist in range(nlists): + box_id_t *list${ilist}_starts, + %endfor + + %if not write_counts: + %for ilist in range(nlists): + const box_id_t *list${ilist}_lists, + %endfor + const box_id_t *new_starts, + %endif + + /* output: */ + + %if not write_counts: + box_id_t *new_lists, + %else: + box_id_t *new_counts, + %endif + """, + + operation=r"""//CL:mako// + /* Compute output and input indices. */ + const box_id_t ioutput_box = i; + const box_id_t ibox = output_to_input_box[ioutput_box]; + + /* Count the size of the input at the current index. */ + %for ilist in range(nlists): + const box_id_t list${ilist}_start = list${ilist}_starts[ibox]; + const box_id_t list${ilist}_count = + list${ilist}_starts[ibox + 1] - list${ilist}_start; + %endfor + + /* Update the counts or copy the elements. */ + %if write_counts: + if (ioutput_box == 0) + new_counts[0] = 0; + + new_counts[ioutput_box + 1] = + %for ilist in range(nlists): + + list${ilist}_count + %endfor + ; + %else: + box_id_t cur_idx = new_starts[ioutput_box]; + + %for ilist in range(nlists): + for (box_id_t j = 0; j < list${ilist}_count; ++j) + { + new_lists[cur_idx++] = + list${ilist}_lists[list${ilist}_start + j]; + } + %endfor + %endif + """, + + name="merge_lists") + + +class _IndexStyle: + TARGET_BOXES = 0 + TARGET_OR_TARGET_PARENT_BOXES = 1 + + +class _ListMerger(object): + """Utility class for combining box lists optionally changing indexing style.""" + + def __init__(self, context, box_id_dtype): + self.context = context + self.box_id_dtype = box_id_dtype + + @memoize_method + def get_list_merger_kernel(self, nlists, write_counts): + """ + :arg input_index_style: A :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + :arg write_counts: A :class:`bool`, indicating whether to generate a + kernel that produces box counts or box lists + """ + assert nlists >= 1 + + return LIST_MERGER_TEMPLATE.build( + self.context, + type_aliases=( + ("box_id_t", self.box_id_dtype), + ), + var_values=( + ("nlists", nlists), + ("write_counts", write_counts), + )) + + def __call__(self, queue, input_starts, input_lists, input_index_style, + output_index_style, target_boxes, target_or_target_parent_boxes, + nboxes, debug=False, wait_for=[]): + """ + :arg input_starts: Starts arrays of input + :arg input_lists: Lists arrays of input + :arg input_index_style: A tuple of :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + :returns: A pair *results_dict, event*, where *results_dict* + contains entries *starts* and *lists* + """ + + if ( + output_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and input_index_style == _IndexStyle.TARGET_BOXES): + raise ValueError( + "unsupported: merging a list indexed by target boxes " + "into a list indexed by target or target parent boxes") + + ntarget_boxes = len(target_boxes) + ntarget_or_ntarget_parent_boxes = len(target_or_target_parent_boxes) + + output_size = (ntarget_boxes + if output_index_style == _IndexStyle.TARGET_BOXES + else ntarget_or_ntarget_parent_boxes) + + if ( + input_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and output_index_style == _IndexStyle.TARGET_BOXES): + from boxtree.tools import reverse_index_array + target_or_target_parent_boxes_from_all_boxes = reverse_index_array( + target_or_target_parent_boxes, target_size=nboxes, + queue=queue) + target_or_target_parent_boxes_from_target_boxes = cl.array.take( + target_or_target_parent_boxes_from_all_boxes, + target_boxes, queue=queue) + + output_to_input_box = target_or_target_parent_boxes_from_target_boxes + else: + output_to_input_box = cl.array.arange( + queue, output_size, dtype=self.box_id_dtype) + + new_counts = cl.array.empty(queue, output_size+1, self.box_id_dtype) + + assert len(input_starts) == len(input_lists) + nlists = len(input_starts) + + evt = self.get_list_merger_kernel(nlists, True)(*( + # input: + (output_to_input_box,) + + input_starts + # output: + + (new_counts,)), + range=slice(output_size), + queue=queue, + wait_for=wait_for) + + new_starts = cl.array.cumsum(new_counts) + del new_counts + + new_lists = cl.array.empty( + queue, + int(new_starts[-1].get()), + self.box_id_dtype) + + new_lists.fill(999999999) + + evt = self.get_list_merger_kernel(nlists, False)(*( + # input: + (output_to_input_box,) + + input_starts + + input_lists + + (new_starts,) + # output: + + (new_lists,)), + range=slice(output_size), + queue=queue, + wait_for=[evt]) + + return dict(starts=new_starts, lists=new_lists), evt + +# }}} + + # {{{ traversal info (output) class FMMTraversalInfo(DeviceDataRecord): @@ -1359,7 +1541,7 @@ class FMMTraversalInfo(DeviceDataRecord): which boxes are used with the interaction list entries of :attr:`from_sep_smaller_by_level`. ``target_boxes_sep_smaller_by_source_level[i]`` has length - ``from_sep_smaller_by_level[i].num_nonempty_lists`. + ``from_sep_smaller_by_level[i].num_nonempty_lists``. .. attribute:: from_sep_smaller_by_level @@ -1403,7 +1585,10 @@ class FMMTraversalInfo(DeviceDataRecord): interactions between boxes that would ordinarily be handled through "List 4", but must be evaluated specially/directly because of :ref:`extent`. - Indexed like :attr:`target_or_target_parent_boxes`. See :ref:`csr`. + *from_sep_bigger_starts* is indexed like + :attr:`target_or_target_parent_boxes`. Similar to the other "close" lists, + *from_sep_close_bigger_starts* is indexed like :attr:`target_boxes`. See + :ref:`csr`. .. attribute:: from_sep_bigger_starts @@ -1415,11 +1600,17 @@ class FMMTraversalInfo(DeviceDataRecord): .. attribute:: from_sep_close_bigger_starts - ``box_id_t [ntarget_or_target_parent_boxes+1]`` (or *None*) + ``box_id_t [ntarget_boxes+1]`` (or *None*) .. attribute:: from_sep_close_bigger_lists ``box_id_t [*]`` (or *None*) + + .. versionchanged:: 2018.2 + + Changed index style of *from_sep_close_bigger_starts* from + :attr:`target_or_target_parent_boxes` to :attr:`target_boxes`. + """ # {{{ "close" list merging -> "unified list 1" @@ -1432,149 +1623,38 @@ class FMMTraversalInfo(DeviceDataRecord): *None*. """ - from boxtree.tools import reverse_index_array - target_or_target_parent_boxes_from_all_boxes = reverse_index_array( - self.target_or_target_parent_boxes, target_size=self.tree.nboxes, - queue=queue) - target_or_target_parent_boxes_from_tgt_boxes = cl.array.take( - target_or_target_parent_boxes_from_all_boxes, - self.target_boxes, queue=queue) - - del target_or_target_parent_boxes_from_all_boxes - - @memoize_in(self, "merge_close_lists_kernel") - def get_new_nb_sources_knl(write_counts): - from pyopencl.elementwise import ElementwiseTemplate - return ElementwiseTemplate("""//CL:mako// - /* input: */ - box_id_t *target_or_target_parent_boxes_from_tgt_boxes, - box_id_t *neighbor_source_boxes_starts, - box_id_t *from_sep_close_smaller_starts, - box_id_t *from_sep_close_bigger_starts, - - %if not write_counts: - box_id_t *neighbor_source_boxes_lists, - box_id_t *from_sep_close_smaller_lists, - box_id_t *from_sep_close_bigger_lists, - - box_id_t *new_neighbor_source_boxes_starts, - %endif - - /* output: */ - - %if write_counts: - box_id_t *new_neighbor_source_boxes_counts, - %else: - box_id_t *new_neighbor_source_boxes_lists, - %endif - """, - """//CL:mako// - box_id_t itgt_box = i; - box_id_t itarget_or_target_parent_box = - target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; - - box_id_t neighbor_source_boxes_start = - neighbor_source_boxes_starts[itgt_box]; - box_id_t neighbor_source_boxes_count = - neighbor_source_boxes_starts[itgt_box + 1] - - neighbor_source_boxes_start; - - box_id_t from_sep_close_smaller_start = - from_sep_close_smaller_starts[itgt_box]; - box_id_t from_sep_close_smaller_count = - from_sep_close_smaller_starts[itgt_box + 1] - - from_sep_close_smaller_start; - - box_id_t from_sep_close_bigger_start = - from_sep_close_bigger_starts[itarget_or_target_parent_box]; - box_id_t from_sep_close_bigger_count = - from_sep_close_bigger_starts[itarget_or_target_parent_box + 1] - - from_sep_close_bigger_start; - - %if write_counts: - if (itgt_box == 0) - new_neighbor_source_boxes_counts[0] = 0; - - new_neighbor_source_boxes_counts[itgt_box + 1] = - neighbor_source_boxes_count - + from_sep_close_smaller_count - + from_sep_close_bigger_count - ; - %else: - - box_id_t cur_idx = new_neighbor_source_boxes_starts[itgt_box]; - - #define COPY_FROM(NAME) \ - for (box_id_t i = 0; i < NAME##_count; ++i) \ - new_neighbor_source_boxes_lists[cur_idx++] = \ - NAME##_lists[NAME##_start+i]; - - COPY_FROM(neighbor_source_boxes) - COPY_FROM(from_sep_close_smaller) - COPY_FROM(from_sep_close_bigger) - - %endif - """).build( - queue.context, - type_aliases=( - ("box_id_t", self.tree.box_id_dtype), - ), - var_values=( - ("write_counts", write_counts), - ) - ) - - ntarget_boxes = len(self.target_boxes) - new_neighbor_source_boxes_counts = cl.array.empty( - queue, ntarget_boxes+1, self.tree.box_id_dtype) - get_new_nb_sources_knl(True)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - - # output: - new_neighbor_source_boxes_counts, - range=slice(ntarget_boxes), - queue=queue) - - new_neighbor_source_boxes_starts = cl.array.cumsum( - new_neighbor_source_boxes_counts) - del new_neighbor_source_boxes_counts - - new_neighbor_source_boxes_lists = cl.array.empty( - queue, - int(new_neighbor_source_boxes_starts[ntarget_boxes].get()), - self.tree.box_id_dtype) - - new_neighbor_source_boxes_lists.fill(999999999) - - get_new_nb_sources_knl(False)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - self.neighbor_source_boxes_lists, - self.from_sep_close_smaller_lists, - self.from_sep_close_bigger_lists, - - new_neighbor_source_boxes_starts, - - # output: - new_neighbor_source_boxes_lists, - range=slice(ntarget_boxes), - queue=queue) + list_merger = _ListMerger(queue.context, self.tree.box_id_dtype) + + result, evt = ( + list_merger( + queue, + # starts + (self.neighbor_source_boxes_starts, + self.from_sep_close_smaller_starts, + self.from_sep_close_bigger_starts), + # lists + (self.neighbor_source_boxes_lists, + self.from_sep_close_smaller_lists, + self.from_sep_close_bigger_lists), + # input index styles + _IndexStyle.TARGET_BOXES, + # output index style + _IndexStyle.TARGET_BOXES, + # box and tree data + self.target_boxes, + self.target_or_target_parent_boxes, + self.tree.nboxes, + debug)) + + cl.wait_for_events([evt]) return self.copy( - neighbor_source_boxes_starts=new_neighbor_source_boxes_starts, - neighbor_source_boxes_lists=new_neighbor_source_boxes_lists, - from_sep_close_smaller_starts=None, - from_sep_close_smaller_lists=None, - from_sep_close_bigger_starts=None, - from_sep_close_bigger_lists=None) + neighbor_source_boxes_starts=result["starts"], + neighbor_source_boxes_lists=result["lists"], + from_sep_close_smaller_starts=None, + from_sep_close_smaller_lists=None, + from_sep_close_bigger_starts=None, + from_sep_close_bigger_lists=None) # }}} @@ -2149,14 +2229,44 @@ class FMMTraversalBuilder: from_sep_bigger = result["from_sep_bigger"] if with_extent: - from_sep_close_bigger_starts = result["from_sep_close_bigger"].starts - from_sep_close_bigger_lists = result["from_sep_close_bigger"].lists + # These are indexed by target_or_target_parent boxes; we rewrite + # them to be indexed by target_boxes. + from_sep_close_bigger_starts_raw = result["from_sep_close_bigger"].starts + from_sep_close_bigger_lists_raw = result["from_sep_close_bigger"].lists + + list_merger = _ListMerger(queue.context, tree.box_id_dtype) + result, evt = list_merger( + queue, + # starts + (from_sep_close_bigger_starts_raw,), + # lists + (from_sep_close_bigger_lists_raw,), + # input index style + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES, + # output index style + _IndexStyle.TARGET_BOXES, + # box and tree data + target_boxes, + target_or_target_parent_boxes, + tree.nboxes, + debug, + wait_for=wait_for) + + wait_for = [evt] + + del from_sep_close_bigger_starts_raw + del from_sep_close_bigger_lists_raw + + from_sep_close_bigger_starts = result["starts"] + from_sep_close_bigger_lists = result["lists"] else: from_sep_close_bigger_starts = None from_sep_close_bigger_lists = None # }}} + # }}} + if self.well_sep_is_n_away == 1: colleagues_starts = same_level_non_well_sep_boxes.starts colleagues_lists = same_level_non_well_sep_boxes.lists diff --git a/boxtree/version.py b/boxtree/version.py index aac0098..f5c7ef5 100644 --- a/boxtree/version.py +++ b/boxtree/version.py @@ -1,2 +1,2 @@ -VERSION = (2018, 1) +VERSION = (2018, 2) VERSION_TEXT = ".".join(str(i) for i in VERSION) diff --git a/boxtree/visualization.py b/boxtree/visualization.py index c487a96..e5fb19d 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -268,7 +268,7 @@ def draw_box_lists(tree_plotter, traversal, ibox): _draw_box_list(tree_plotter, ibox, traversal.from_sep_close_bigger_starts, traversal.from_sep_close_bigger_lists, - key_to_box=traversal.target_or_target_parent_boxes, + key_to_box=traversal.target_boxes, facecolor="purple", hatch=".") # }}} diff --git a/doc/misc.rst b/doc/misc.rst index 1086d8b..a4c008f 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -24,13 +24,18 @@ for instructions. User-visible Changes ==================== -Version 2018.1 +Version 2018.2 -------------- .. note:: This version is currently under development. You can get snapshots from boxtree's `git repository `_ +* Changed index style of the *from_sep_close_bigger_starts* interaction list. + +Version 2018.1 +-------------- + * Added *timing_data* parameter to FMM driver. Version 2013.1 -- GitLab From 0703ceab599b3bf744775527b89a219f1dcb0eff Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 15:38:15 -0500 Subject: [PATCH 02/20] Fix folding --- boxtree/traversal.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index fa65c16..227c2cc 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2265,8 +2265,6 @@ class FMMTraversalBuilder: # }}} - # }}} - if self.well_sep_is_n_away == 1: colleagues_starts = same_level_non_well_sep_boxes.starts colleagues_lists = same_level_non_well_sep_boxes.lists -- GitLab From eef6c91f2d22328288cdedd12bf399690b7148bb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 16:08:50 -0500 Subject: [PATCH 03/20] Fix indexing style in FMM --- boxtree/fmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index 54a1649..ecbe769 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -150,7 +150,7 @@ def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): local_result, timing_future = wrangler.form_locals( traversal.level_start_target_or_target_parent_box_nrs, - traversal.target_or_target_parent_boxes, + traversal.target_boxes, traversal.from_sep_bigger_starts, traversal.from_sep_bigger_lists, src_weights) -- GitLab From 3515792f9bf1885f55289b489340bfb3791ae467 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 16:10:01 -0500 Subject: [PATCH 04/20] Revert "Fix indexing style in FMM" This reverts commit eef6c91f2d22328288cdedd12bf399690b7148bb. --- boxtree/fmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index ecbe769..54a1649 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -150,7 +150,7 @@ def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): local_result, timing_future = wrangler.form_locals( traversal.level_start_target_or_target_parent_box_nrs, - traversal.target_boxes, + traversal.target_or_target_parent_boxes, traversal.from_sep_bigger_starts, traversal.from_sep_bigger_lists, src_weights) -- GitLab From e00358ce2fcefafd754d84b89113024ae3bd9a62 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 16:10:36 -0500 Subject: [PATCH 05/20] Actually fix FMM --- boxtree/fmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index 54a1649..afaa93d 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -161,7 +161,7 @@ def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): if traversal.from_sep_close_bigger_starts is not None: direct_result, timing_future = wrangler.eval_direct( - traversal.target_or_target_parent_boxes, + traversal.target_boxes, traversal.from_sep_close_bigger_starts, traversal.from_sep_close_bigger_lists, src_weights) -- GitLab From 734813a626a8b532812209c520e4c30e513fa90c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 16:32:00 -0500 Subject: [PATCH 06/20] Rename output_size to noutput_boxes --- boxtree/traversal.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 227c2cc..a26bf51 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1283,7 +1283,7 @@ class _ListMerger(object): ntarget_boxes = len(target_boxes) ntarget_or_ntarget_parent_boxes = len(target_or_target_parent_boxes) - output_size = (ntarget_boxes + noutput_boxes = (ntarget_boxes if output_index_style == _IndexStyle.TARGET_BOXES else ntarget_or_ntarget_parent_boxes) @@ -1301,9 +1301,9 @@ class _ListMerger(object): output_to_input_box = target_or_target_parent_boxes_from_target_boxes else: output_to_input_box = cl.array.arange( - queue, output_size, dtype=self.box_id_dtype) + queue, noutput_boxes, dtype=self.box_id_dtype) - new_counts = cl.array.empty(queue, output_size+1, self.box_id_dtype) + new_counts = cl.array.empty(queue, noutput_boxes+1, self.box_id_dtype) assert len(input_starts) == len(input_lists) nlists = len(input_starts) @@ -1314,7 +1314,7 @@ class _ListMerger(object): + input_starts # output: + (new_counts,)), - range=slice(output_size), + range=slice(noutput_boxes), queue=queue, wait_for=wait_for) @@ -1336,7 +1336,7 @@ class _ListMerger(object): + (new_starts,) # output: + (new_lists,)), - range=slice(output_size), + range=slice(noutput_boxes), queue=queue, wait_for=[evt]) -- GitLab From a664cf4b7536f72479aead2d02862bf5e1d51b93 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 16:32:44 -0500 Subject: [PATCH 07/20] Fix documentation --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index a26bf51..1d52f30 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1267,7 +1267,7 @@ class _ListMerger(object): """ :arg input_starts: Starts arrays of input :arg input_lists: Lists arrays of input - :arg input_index_style: A tuple of :class:`_IndexStyle` + :arg input_index_style: A :class:`_IndexStyle` :arg output_index_style: A :class:`_IndexStyle` :returns: A pair *results_dict, event*, where *results_dict* contains entries *starts* and *lists* -- GitLab From b1251066ec9e1b05dbf1aac150d68170c6e8c35e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 6 Aug 2018 16:33:36 -0500 Subject: [PATCH 08/20] More doc fixes --- boxtree/traversal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 1d52f30..c42189d 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1244,8 +1244,7 @@ class _ListMerger(object): @memoize_method def get_list_merger_kernel(self, nlists, write_counts): """ - :arg input_index_style: A :class:`_IndexStyle` - :arg output_index_style: A :class:`_IndexStyle` + :arg nlists: Number of input lists :arg write_counts: A :class:`bool`, indicating whether to generate a kernel that produces box counts or box lists """ -- GitLab From 6fe5708e3b3baa02fefbc42f8cecce1092386d21 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Aug 2018 01:37:41 -0500 Subject: [PATCH 09/20] Expose ConstantOneExpansionWrangler in boxtree.tools. Also make the wrangler return op counts as timing data. --- boxtree/tools.py | 191 +++++++++++++++++++++++++++++++++++++++++++++++ test/test_fmm.py | 176 +------------------------------------------ 2 files changed, 193 insertions(+), 174 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 97cd43d..e63036e 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -598,4 +598,195 @@ class InlineBinarySearch(object): # }}} + +# {{{ constant one wrangler + +class ConstantOneExpansionWrangler(object): + """This implements the 'analytical routines' for a Green's function that is + constant 1 everywhere. For 'charges' of 'ones', this should get every particle + a copy of the particle count. + """ + + def __init__(self, tree): + self.tree = tree + + def multipole_expansion_zeros(self): + return np.zeros(self.tree.nboxes, dtype=np.float64) + + local_expansion_zeros = multipole_expansion_zeros + + def potential_zeros(self): + return np.zeros(self.tree.ntargets, dtype=np.float64) + + def _get_source_slice(self, ibox): + pstart = self.tree.box_source_starts[ibox] + return slice( + pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) + + def _get_target_slice(self, ibox): + pstart = self.tree.box_target_starts[ibox] + return slice( + pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) + + def reorder_sources(self, source_array): + return source_array[self.tree.user_source_ids] + + def reorder_potentials(self, potentials): + return potentials[self.tree.sorted_target_ids] + + @staticmethod + def timing_future(ops): + return DummyTimingFuture(ops, ops) + + def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): + mpoles = self.multipole_expansion_zeros() + ops = 0 + + for ibox in source_boxes: + pslice = self._get_source_slice(ibox) + mpoles[ibox] += np.sum(src_weights[pslice]) + ops += (pslice.stop - pslice.start) + + return mpoles, self.timing_future(ops) + + def coarsen_multipoles(self, level_start_source_parent_box_nrs, + source_parent_boxes, mpoles): + tree = self.tree + ops = 0 + + # nlevels-1 is the last valid level index + # nlevels-2 is the last valid level that could have children + # + # 3 is the last relevant source_level. + # 2 is the last relevant target_level. + # (because no level 1 box will be well-separated from another) + for source_level in range(tree.nlevels-1, 2, -1): + target_level = source_level - 1 + start, stop = level_start_source_parent_box_nrs[ + target_level:target_level+2] + for ibox in source_parent_boxes[start:stop]: + for child in tree.box_child_ids[:, ibox]: + if child: + mpoles[ibox] += mpoles[child] + ops += 1 + + return mpoles, self.timing_future(ops) + + def eval_direct(self, target_boxes, neighbor_sources_starts, + neighbor_sources_lists, src_weights): + pot = self.potential_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_boxes): + tgt_pslice = self._get_target_slice(tgt_ibox) + + src_sum = 0 + nsrcs = 0 + start, end = neighbor_sources_starts[itgt_box:itgt_box+2] + #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) + for src_ibox in neighbor_sources_lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_pslice.stop - src_pslice.start + + src_sum += np.sum(src_weights[src_pslice]) + + pot[tgt_pslice] = src_sum + ops += (tgt_pslice.stop - tgt_pslice.start) * nsrcs + + return pot, self.timing_future(ops) + + def multipole_to_local(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, + starts, lists, mpole_exps): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + contrib = 0 + #print tgt_ibox, "<-", lists[start:end] + for src_ibox in lists[start:end]: + contrib += mpole_exps[src_ibox] + ops += 1 + + local_exps[tgt_ibox] += contrib + + return local_exps, self.timing_future(ops) + + def eval_multipoles(self, + target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, + mpole_exps): + pot = self.potential_zeros() + ops = 0 + + for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): + for itgt_box, tgt_ibox in \ + enumerate(target_boxes_by_source_level[level]): + tgt_pslice = self._get_target_slice(tgt_ibox) + + contrib = 0 + + start, end = ssn.starts[itgt_box:itgt_box+2] + for src_ibox in ssn.lists[start:end]: + contrib += mpole_exps[src_ibox] + + pot[tgt_pslice] += contrib + ops += (tgt_pslice.stop - tgt_pslice.start) * (end - start) + + return pot, self.timing_future(ops) + + def form_locals(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, starts, lists, src_weights): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + #print "LIST 4", tgt_ibox, "<-", lists[start:end] + contrib = 0 + nsrcs = 0 + for src_ibox in lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_pslice.stop - src_pslice.start + + contrib += np.sum(src_weights[src_pslice]) + + local_exps[tgt_ibox] += contrib + ops += nsrcs + + return local_exps, self.timing_future(ops) + + def refine_locals(self, level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, local_exps): + ops = 0 + + for target_lev in range(1, self.tree.nlevels): + start, stop = level_start_target_or_target_parent_box_nrs[ + target_lev:target_lev+2] + for ibox in target_or_target_parent_boxes[start:stop]: + local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] + ops += 1 + + return local_exps, self.timing_future(ops) + + def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): + pot = self.potential_zeros() + ops = 0 + + for ibox in target_boxes: + tgt_pslice = self._get_target_slice(ibox) + ops += tgt_pslice.stop - tgt_pslice.start + pot[tgt_pslice] += local_exps[ibox] + + return pot, self.timing_future(ops) + + def finalize_potentials(self, potentials): + return potentials + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/test/test_fmm.py b/test/test_fmm.py index cb0b061..94493bd 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -36,7 +36,8 @@ from boxtree.tools import ( # noqa: F401 make_normal_particle_array as p_normal, make_surface_particle_array as p_surface, make_uniform_particle_array as p_uniform, - particle_array_to_host) + particle_array_to_host, + ConstantOneExpansionWrangler) import logging logger = logging.getLogger(__name__) @@ -44,179 +45,6 @@ logger = logging.getLogger(__name__) # {{{ fmm interaction completeness test -from boxtree.tools import return_timing_data - - -class ConstantOneExpansionWrangler(object): - """This implements the 'analytical routines' for a Green's function that is - constant 1 everywhere. For 'charges' of 'ones', this should get every particle - a copy of the particle count. - """ - - def __init__(self, tree): - self.tree = tree - - def multipole_expansion_zeros(self): - return np.zeros(self.tree.nboxes, dtype=np.float64) - - local_expansion_zeros = multipole_expansion_zeros - - def potential_zeros(self): - return np.zeros(self.tree.ntargets, dtype=np.float64) - - def _get_source_slice(self, ibox): - pstart = self.tree.box_source_starts[ibox] - return slice( - pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) - - def _get_target_slice(self, ibox): - pstart = self.tree.box_target_starts[ibox] - return slice( - pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) - - def reorder_sources(self, source_array): - return source_array[self.tree.user_source_ids] - - def reorder_potentials(self, potentials): - return potentials[self.tree.sorted_target_ids] - - @return_timing_data - def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): - mpoles = self.multipole_expansion_zeros() - for ibox in source_boxes: - pslice = self._get_source_slice(ibox) - mpoles[ibox] += np.sum(src_weights[pslice]) - - return mpoles - - @return_timing_data - def coarsen_multipoles(self, level_start_source_parent_box_nrs, - source_parent_boxes, mpoles): - tree = self.tree - - # nlevels-1 is the last valid level index - # nlevels-2 is the last valid level that could have children - # - # 3 is the last relevant source_level. - # 2 is the last relevant target_level. - # (because no level 1 box will be well-separated from another) - for source_level in range(tree.nlevels-1, 2, -1): - target_level = source_level - 1 - start, stop = level_start_source_parent_box_nrs[ - target_level:target_level+2] - for ibox in source_parent_boxes[start:stop]: - for child in tree.box_child_ids[:, ibox]: - if child: - mpoles[ibox] += mpoles[child] - - return mpoles - - @return_timing_data - def eval_direct(self, target_boxes, neighbor_sources_starts, - neighbor_sources_lists, src_weights): - pot = self.potential_zeros() - - for itgt_box, tgt_ibox in enumerate(target_boxes): - tgt_pslice = self._get_target_slice(tgt_ibox) - - src_sum = 0 - start, end = neighbor_sources_starts[itgt_box:itgt_box+2] - #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) - for src_ibox in neighbor_sources_lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - src_sum += np.sum(src_weights[src_pslice]) - - pot[tgt_pslice] = src_sum - - return pot - - @return_timing_data - def multipole_to_local(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, - starts, lists, mpole_exps): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - contrib = 0 - #print tgt_ibox, "<-", lists[start:end] - for src_ibox in lists[start:end]: - contrib += mpole_exps[src_ibox] - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def eval_multipoles(self, - target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, - mpole_exps): - pot = self.potential_zeros() - - for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): - for itgt_box, tgt_ibox in \ - enumerate(target_boxes_by_source_level[level]): - tgt_pslice = self._get_target_slice(tgt_ibox) - - contrib = 0 - - start, end = ssn.starts[itgt_box:itgt_box+2] - for src_ibox in ssn.lists[start:end]: - contrib += mpole_exps[src_ibox] - - pot[tgt_pslice] += contrib - - return pot - - @return_timing_data - def form_locals(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, starts, lists, src_weights): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - #print "LIST 4", tgt_ibox, "<-", lists[start:end] - contrib = 0 - for src_ibox in lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - contrib += np.sum(src_weights[src_pslice]) - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def refine_locals(self, level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, local_exps): - - for target_lev in range(1, self.tree.nlevels): - start, stop = level_start_target_or_target_parent_box_nrs[ - target_lev:target_lev+2] - for ibox in target_or_target_parent_boxes[start:stop]: - local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] - - return local_exps - - @return_timing_data - def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.potential_zeros() - - for ibox in target_boxes: - tgt_pslice = self._get_target_slice(ibox) - pot[tgt_pslice] += local_exps[ibox] - - return pot - - def finalize_potentials(self, potentials): - return potentials - - class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( ConstantOneExpansionWrangler): def __init__(self, tree, filtered_targets): -- GitLab From a7d23013dcc6436392398da6b04b7b0580a6bd08 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Aug 2018 02:04:10 -0500 Subject: [PATCH 10/20] Can't assume a slice object is returned. --- boxtree/tools.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index e63036e..e8935ee 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -645,7 +645,7 @@ class ConstantOneExpansionWrangler(object): for ibox in source_boxes: pslice = self._get_source_slice(ibox) mpoles[ibox] += np.sum(src_weights[pslice]) - ops += (pslice.stop - pslice.start) + ops += src_weights[pslice].size return mpoles, self.timing_future(ops) @@ -686,12 +686,12 @@ class ConstantOneExpansionWrangler(object): #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) for src_ibox in neighbor_sources_lists[start:end]: src_pslice = self._get_source_slice(src_ibox) - nsrcs += src_pslice.stop - src_pslice.start + ops += src_weights[src_pslice].size src_sum += np.sum(src_weights[src_pslice]) pot[tgt_pslice] = src_sum - ops += (tgt_pslice.stop - tgt_pslice.start) * nsrcs + ops += pot[tgt_pslice].size * nsrcs return pot, self.timing_future(ops) @@ -733,7 +733,7 @@ class ConstantOneExpansionWrangler(object): contrib += mpole_exps[src_ibox] pot[tgt_pslice] += contrib - ops += (tgt_pslice.stop - tgt_pslice.start) * (end - start) + ops += pot[tgt_pslice].size * (end - start) return pot, self.timing_future(ops) @@ -751,7 +751,7 @@ class ConstantOneExpansionWrangler(object): nsrcs = 0 for src_ibox in lists[start:end]: src_pslice = self._get_source_slice(src_ibox) - nsrcs += src_pslice.stop - src_pslice.start + nsrcs += src_weights[src_pslice].size contrib += np.sum(src_weights[src_pslice]) @@ -779,8 +779,8 @@ class ConstantOneExpansionWrangler(object): for ibox in target_boxes: tgt_pslice = self._get_target_slice(ibox) - ops += tgt_pslice.stop - tgt_pslice.start pot[tgt_pslice] += local_exps[ibox] + ops += pot[tgt_pslice].size return pot, self.timing_future(ops) -- GitLab From 23df1e25914eb6c61a2fc77fcc68fe88d19f2799 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Aug 2018 15:39:17 -0500 Subject: [PATCH 11/20] Modernize wrangler interface --- boxtree/tools.py | 12 ++++++------ test/test_fmm.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index e8935ee..423d04c 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -615,7 +615,7 @@ class ConstantOneExpansionWrangler(object): local_expansion_zeros = multipole_expansion_zeros - def potential_zeros(self): + def output_zeros(self): return np.zeros(self.tree.ntargets, dtype=np.float64) def _get_source_slice(self, ibox): @@ -674,7 +674,7 @@ class ConstantOneExpansionWrangler(object): def eval_direct(self, target_boxes, neighbor_sources_starts, neighbor_sources_lists, src_weights): - pot = self.potential_zeros() + pot = self.output_zeros() ops = 0 for itgt_box, tgt_ibox in enumerate(target_boxes): @@ -686,7 +686,7 @@ class ConstantOneExpansionWrangler(object): #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) for src_ibox in neighbor_sources_lists[start:end]: src_pslice = self._get_source_slice(src_ibox) - ops += src_weights[src_pslice].size + nsrcs += src_weights[src_pslice].size src_sum += np.sum(src_weights[src_pslice]) @@ -718,7 +718,7 @@ class ConstantOneExpansionWrangler(object): def eval_multipoles(self, target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, mpole_exps): - pot = self.potential_zeros() + pot = self.output_zeros() ops = 0 for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): @@ -764,7 +764,7 @@ class ConstantOneExpansionWrangler(object): target_or_target_parent_boxes, local_exps): ops = 0 - for target_lev in range(1, self.tree.nlevels): + for target_lev in range(self.tree.nlevels): start, stop = level_start_target_or_target_parent_box_nrs[ target_lev:target_lev+2] for ibox in target_or_target_parent_boxes[start:stop]: @@ -774,7 +774,7 @@ class ConstantOneExpansionWrangler(object): return local_exps, self.timing_future(ops) def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.potential_zeros() + pot = self.output_zeros() ops = 0 for ibox in target_boxes: diff --git a/test/test_fmm.py b/test/test_fmm.py index 94493bd..8041721 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -51,7 +51,7 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( ConstantOneExpansionWrangler.__init__(self, tree) self.filtered_targets = filtered_targets - def potential_zeros(self): + def output_zeros(self): return np.zeros(self.filtered_targets.nfiltered_targets, dtype=np.float64) def _get_target_slice(self, ibox): -- GitLab From c31e3958253b1f303b4becc40472538c41789c19 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 18:27:01 -0500 Subject: [PATCH 12/20] Change TimingResult to be a mapping-like object. --- boxtree/fmm.py | 64 +++++++++++++++++++++++++++++++++--------------- boxtree/tools.py | 16 +++++++----- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index afaa93d..3e79b2f 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -25,6 +25,15 @@ THE SOFTWARE. import logging logger = logging.getLogger(__name__) + +try: + # Python 3 + from collections.abc import Mapping +except ImportError: + # Python 2 + from collections import Mapping + + from pytools import ProcessLogger, Record @@ -357,16 +366,42 @@ class ExpansionWranglerInterface: # {{{ timing result -class TimingResult(Record): - """ - .. attribute:: wall_elapsed - .. attribute:: process_elapsed +class TimingResult(Mapping): + """Interface for returned timing data. + + This supports accessing timing results via a mapping interface, along with + combining results via :meth:`merge`. + + .. automethod:: merge """ - def __init__(self, wall_elapsed, process_elapsed): - Record.__init__(self, - wall_elapsed=wall_elapsed, - process_elapsed=process_elapsed) + def __init__(self, *args, **kwargs): + """See constructor for :class:`dict`.""" + self._mapping = dict(*args, **kwargs) + + def __getitem__(self, key): + return self._mapping[key] + + def __iter__(self): + return iter(self._mapping) + + def __len__(self): + return len(self._mapping) + + def merge(self, other): + """Merge this result with another by adding together common fields.""" + result = {} + + for key in self: + val = self.get(val) + other_val = other.get(val) + + if val is None or other_val is None: + continue + + result[key] = val + other_val + + return type(self)(result) # }}} @@ -402,17 +437,6 @@ class TimingRecorder(object): def add(self, description, future): self.futures[description].append(future) - def merge(self, result1, result2): - wall_elapsed = None - process_elapsed = None - - if None not in (result1.wall_elapsed, result2.wall_elapsed): - wall_elapsed = result1.wall_elapsed + result2.wall_elapsed - if None not in (result1.process_elapsed, result2.process_elapsed): - process_elapsed = result1.process_elapsed + result2.process_elapsed - - return TimingResult(wall_elapsed, process_elapsed) - def summarize(self): result = {} @@ -421,7 +445,7 @@ class TimingRecorder(object): timing_result = next(futures).result() for future in futures: - timing_result = self.merge(timing_result, future.result()) + timing_result = timing_result.merge(future.result()) result[description] = timing_result diff --git a/boxtree/tools.py b/boxtree/tools.py index 423d04c..bf8c671 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -517,14 +517,18 @@ class DummyTimingFuture(TimingFuture): @classmethod def from_timer(cls, timer): - return cls(timer.wall_elapsed, timer.process_elapsed) + return cls(wall_elapsed=timer.wall_elapsed, + process_elapsed=timer.process_elapsed) - def __init__(self, wall_elapsed, process_elapsed): - self.wall_elapsed = wall_elapsed - self.process_elapsed = process_elapsed + @classmethod + def from_op_count(cls, op_count): + return cls(ops_elapsed=op_count) + + def __init__(self, *args, **kwargs): + self._result = TimingResult(*args, **kwargs) def result(self): - return TimingResult(self.wall_elapsed, self.process_elapsed) + return self._result def done(self): return True @@ -636,7 +640,7 @@ class ConstantOneExpansionWrangler(object): @staticmethod def timing_future(ops): - return DummyTimingFuture(ops, ops) + return DummyTimingFuture.from_op_count(ops) def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): mpoles = self.multipole_expansion_zeros() -- GitLab From 10e3940796e6ecdc6b7dd764fcb8ae99c136130f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 18:31:32 -0500 Subject: [PATCH 13/20] Document what timing data the wranglers collect --- boxtree/pyfmmlib_integration.py | 4 ++++ boxtree/tools.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index c076e56..33e9dec 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -41,6 +41,10 @@ __doc__ = """Integrates :mod:`boxtree` with class FMMLibExpansionWrangler(object): """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` by using pyfmmlib. + + Timing results returned by this wrangler contains the values *wall_elapsed* + and (optionally, if supported) *process_elapsed*, which measure wall time + and process time in seconds, respectively. """ # {{{ constructor diff --git a/boxtree/tools.py b/boxtree/tools.py index bf8c671..97c7dcd 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -609,6 +609,10 @@ class ConstantOneExpansionWrangler(object): """This implements the 'analytical routines' for a Green's function that is constant 1 everywhere. For 'charges' of 'ones', this should get every particle a copy of the particle count. + + Timing results returned by this wrangler contain the field *ops_elapsed*, + which counts approximately the number of floating-point operations required + by the FMM. """ def __init__(self, tree): -- GitLab From 2a4b8f2bbd7c378ca109f390bca6517b5f99ee47 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 19:19:57 -0500 Subject: [PATCH 14/20] Simplify sentence --- boxtree/tools.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 97c7dcd..4d6fe43 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -611,8 +611,7 @@ class ConstantOneExpansionWrangler(object): a copy of the particle count. Timing results returned by this wrangler contain the field *ops_elapsed*, - which counts approximately the number of floating-point operations required - by the FMM. + which counts approximately the number of floating-point operations required. """ def __init__(self, tree): -- GitLab From 087b20d4590004d4a7e2c5bdfac69d2b6490dc31 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 19:21:52 -0500 Subject: [PATCH 15/20] flake8 fixes --- boxtree/fmm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index 3e79b2f..818de7a 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -34,7 +34,7 @@ except ImportError: from collections import Mapping -from pytools import ProcessLogger, Record +from pytools import ProcessLogger def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): @@ -393,8 +393,8 @@ class TimingResult(Mapping): result = {} for key in self: - val = self.get(val) - other_val = other.get(val) + val = self.get(key) + other_val = other.get(key) if val is None or other_val is None: continue -- GitLab From 22bf0c7dbea7415c195d55f8c59d8f93d4b1f847 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 19:57:51 -0500 Subject: [PATCH 16/20] Fix syntax error --- boxtree/tools.py | 1 - 1 file changed, 1 deletion(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 9b1d318..54a60d3 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -982,7 +982,6 @@ class ConstantOneExpansionWrangler(object): def finalize_potentials(self, potentials): return potentials ->>>>>>> move-constant-one-wrangler-to-tools # }}} -- GitLab From 08941f5df1772b9a98af8fe8ab7ac0c84e7bc46c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 19:58:16 -0500 Subject: [PATCH 17/20] Update __add__ --- boxtree/fmm.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index e717d66..3002888 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -403,11 +403,7 @@ class TimingResult(Mapping): return type(self)(result) - def __add__(self, other): - return TimingResult( - self.wall_elapsed + other.wall_elapsed, - self.process_elapsed + other.process_elapsed - ) + __add__ = merge # }}} -- GitLab From 3b9893e4b9660b4847f8bc63636ea1bdb0c413e4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 20:01:54 -0500 Subject: [PATCH 18/20] Update uses of TimingResult in the performance model to new interface --- boxtree/distributed/perf_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/boxtree/distributed/perf_model.py b/boxtree/distributed/perf_model.py index 3210303..b036b4e 100644 --- a/boxtree/distributed/perf_model.py +++ b/boxtree/distributed/perf_model.py @@ -476,9 +476,9 @@ class PerformanceModel: result = self.time_result[0] if wall_time: - dependent_value = result[y_name].wall_elapsed + dependent_value = result[y_name]["wall_elapsed"] else: - dependent_value = result[y_name].process_elapsed + dependent_value = result[y_name]["process_elapsed"] independent_value = result[x_name[0]] coeff = dependent_value / independent_value @@ -490,9 +490,9 @@ class PerformanceModel: for iresult, result in enumerate(self.time_result): if wall_time: - dependent_value[iresult] = result[y_name].wall_elapsed + dependent_value[iresult] = result[y_name]["wall_elapsed"] else: - dependent_value[iresult] = result[y_name].process_elapsed + dependent_value[iresult] = result[y_name]["process_elapsed"] for icol, variable_name in enumerate(x_name): coeff_matrix[iresult, icol] = result[variable_name] @@ -718,8 +718,8 @@ class PerformanceModel: elif isinstance(entry, TimingResult): current_output[field_name] = { - 'wall_elapsed': entry.wall_elapsed, - 'process_elapsed': entry.process_elapsed + 'wall_elapsed': entry.get("wall_elapsed"), + 'process_elapsed': entry.get("process_elapsed") } output.append(current_output) -- GitLab From f0e56de252b21da7d9c46ec1e27084ffe95d874f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 20:38:59 -0500 Subject: [PATCH 19/20] Another fix --- boxtree/distributed/perf_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/distributed/perf_model.py b/boxtree/distributed/perf_model.py index b036b4e..34eb45c 100644 --- a/boxtree/distributed/perf_model.py +++ b/boxtree/distributed/perf_model.py @@ -688,8 +688,8 @@ class PerformanceModel: elif isinstance(entry, dict): converted_result[field_name] = TimingResult( - entry['wall_elapsed'], - entry['process_elapsed'] + wall_elapsed=entry['wall_elapsed'], + process_elapsed=entry['process_elapsed'] ) else: -- GitLab From a015815c800fdcaea234c2399249ea4042fd547d Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 16 Aug 2018 21:18:10 -0500 Subject: [PATCH 20/20] Fixes for reindexing List 4 close --- boxtree/distributed/calculation.py | 2 +- boxtree/distributed/partition.py | 2 +- boxtree/distributed/perf_model.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/boxtree/distributed/calculation.py b/boxtree/distributed/calculation.py index 107cb76..e565fe5 100644 --- a/boxtree/distributed/calculation.py +++ b/boxtree/distributed/calculation.py @@ -477,7 +477,7 @@ def calculate_pot(local_wrangler, global_wrangler, local_trav, source_weights, "('list 4 close')") potentials = potentials + local_wrangler.eval_direct( - local_trav.target_or_target_parent_boxes, + local_trav.target_boxes, local_trav.from_sep_close_bigger_starts, local_trav.from_sep_close_bigger_lists, local_src_weights)[0] diff --git a/boxtree/distributed/partition.py b/boxtree/distributed/partition.py index ecaa77c..453b73f 100644 --- a/boxtree/distributed/partition.py +++ b/boxtree/distributed/partition.py @@ -264,7 +264,7 @@ class ResponsibleBoxesQuery(object): # Add list 4 close of responsible boxes if self.traversal.from_sep_close_bigger_starts is not None: self.add_interaction_list_boxes( - self.target_or_target_parent_boxes_dev, + self.target_boxes_dev, responsible_boxes_mask | ancestor_boxes_mask, self.from_sep_close_bigger_starts_dev, self.from_sep_close_bigger_lists_dev, diff --git a/boxtree/distributed/perf_model.py b/boxtree/distributed/perf_model.py index 34eb45c..049d541 100644 --- a/boxtree/distributed/perf_model.py +++ b/boxtree/distributed/perf_model.py @@ -229,7 +229,7 @@ class PerformanceCounter: ) if traversal.from_sep_close_bigger_starts is not None: - ndirect_src_boxes[traversal.target_or_target_parent_boxes] += ( + ndirect_src_boxes[traversal.target_boxes] += ( traversal.from_sep_close_bigger_starts[1:] - traversal.from_sep_close_bigger_starts[:-1] ) @@ -326,7 +326,7 @@ class PerformanceCounter: else: np2l = np.zeros(len(trav.target_or_target_parent_boxes), dtype=np.intp) - for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): + for itgt_box, tgt_ibox in enumerate(trav.target_boxes): tgt_box_level = trav.tree.box_levels[tgt_ibox] ncoeffs = parameters.ncoeffs_fmm_by_level[tgt_box_level] -- GitLab