From 12c3276eb10dec313bfa9a3fa0e79c157d5a8f91 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 12 Oct 2017 20:55:09 -0500 Subject: [PATCH 1/6] WIP --- boxtree/traversal.py | 259 ++++++++++++++++++++++++++----------------- 1 file changed, 156 insertions(+), 103 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index a35c60d..5fc5689 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -618,7 +618,15 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) if (sep) { - APPEND_from_sep_siblings(sib_box_id); + if (box_source_counts_cumul[sib_box_id] + >= multipole_min_nsources_cumul) + { + APPEND_from_sep_siblings(sib_box_id); + } + else + { + APPEND_from_sep_siblings_underfilled(sib_box_id); + } } } } @@ -812,7 +820,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) if (meets_sep_crit && box_source_counts_cumul[walk_box_id] - >= from_sep_smaller_min_nsources_cumul) + >= multipole_min_nsources_cumul) { if (from_sep_smaller_source_level == walk_level) APPEND_from_sep_smaller(walk_box_id); @@ -1309,6 +1317,10 @@ class FMMTraversalInfo(DeviceDataRecord): (Note: This list contains global box numbers, not indices into :attr:`source_boxes`.) + (Optionally, this can also contain source boxes smaller than the target box + which are separated by the target box's size. These boxes' ancestors did not + meet the source count threshold for inclusion in List 2.) + If :attr:`boxtree.Tree.sources_have_extent` or :attr:`boxtree.Tree.targets_have_extent`, then :attr:`from_sep_close_bigger_starts` will be non-*None*. It records @@ -1332,18 +1344,100 @@ class FMMTraversalInfo(DeviceDataRecord): .. attribute:: from_sep_close_bigger_lists ``box_id_t [*]`` (or *None*) + """ - # {{{ "close" list merging -> "unified list 1" + # {{{ list merging tools - def merge_close_lists(self, queue, debug=False): - """Return a new :class:`FMMTraversalInfo` instance with the contents of - :attr:`from_sep_close_smaller_starts` and - :attr:`from_sep_close_bigger_starts` merged into - :attr:`neighbor_source_boxes_starts` and these two attributes set to - *None*. + @memoize_method + def get_list_merger_kernel(self, + input_index_kinds, output_index_kind, write_counts): """ + :arg input_index_kinds: A tuple of elements each of which is + one of the strings *target_boxes* or *target_or_target_parent_boxes* + + :arg output_index_kind: Either *target_boxes* or + *target_or_target_parent_boxes* + """ + nlists = len(list_indexing_styles) + assert nlists >= 1 + assert all( + style in ("target_boxes", "target_or_target_parent_boxes") + for style in list_indexing_styles) + from pyopencl.elementwise import ElementwiseTemplate + return ElementwiseTemplate("""//CL:mako// + /* input: */ + box_id_t *target_or_target_parent_boxes_from_tgt_boxes, + + %for ilist in range(nlists): + box_id_t list${ilist}_starts; + %endfor + + %if not write_counts: + %for ilist in range(nlists): + const box_id_t *list${ilist}_lists; + %endfor + const box_id_t *new_starts; + %endif + + /* output: */ + + %if not write_counts: + box_id_t *new_lists, + %else: + box_id_t *new_counts, + %endif + """, + """//CL:mako// + %if output_index_kind == "target_boxes": + const box_id_t itgt_box = i; + const box_id_t itarget_or_target_parent_box = + target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; + %elif output_index_kind == "target_or_target_parent_boxes": + const box_id_d itarget_or_target_parent_box = i; + %else + %endif + + %for ilist, ilist_index_kind in enumerate(input_index_kinds): + const box_id_t list${ilist}_start = + list${ilist}_starts[itgt_box]; + const box_id_t list${ilist}_count = + list${ilist}_starts[itgt_box + 1] + - list${ilist}_start; + %endfor + + %if write_counts: + if (itgt_box == 0) + new_counts[0] = 0; + + new_counts[itgt_box + 1] = + %for ilist in range(nlists): + + list${ilist}_count + %endfor + ; + %else: + box_id_t cur_idx = new_starts[itgt_box]; + + %for ilist in range(nlists): + for (box_id_t i = 0; i < list${ilist}_count; ++i) + { + new_lists[cur_idx++] = + list${ilist}_lists[list_${ilist}_start + i]; + } + %endfor + %endif + """).build( + queue.context, + type_aliases=( + ("box_id_t", self.tree.box_id_dtype), + ), + var_values=( + ("nlists", nlists), + ("write_counts", write_counts), + ) + ) + def merge_lists(self, queue, input_starts, input_lists, input_index_kinds, ouput_index_kind, debug=False): from boxtree.tools import reverse_index_array target_or_target_parent_boxes_from_all_boxes = reverse_index_array( self.target_or_target_parent_boxes, target_size=self.tree.nboxes, @@ -1354,88 +1448,6 @@ class FMMTraversalInfo(DeviceDataRecord): del target_or_target_parent_boxes_from_all_boxes - @memoize_in(self, "merge_close_lists_kernel") - def get_new_nb_sources_knl(write_counts): - from pyopencl.elementwise import ElementwiseTemplate - return ElementwiseTemplate("""//CL:mako// - /* input: */ - box_id_t *target_or_target_parent_boxes_from_tgt_boxes, - box_id_t *neighbor_source_boxes_starts, - box_id_t *from_sep_close_smaller_starts, - box_id_t *from_sep_close_bigger_starts, - - %if not write_counts: - box_id_t *neighbor_source_boxes_lists, - box_id_t *from_sep_close_smaller_lists, - box_id_t *from_sep_close_bigger_lists, - - box_id_t *new_neighbor_source_boxes_starts, - %endif - - /* output: */ - - %if write_counts: - box_id_t *new_neighbor_source_boxes_counts, - %else: - box_id_t *new_neighbor_source_boxes_lists, - %endif - """, - """//CL:mako// - box_id_t itgt_box = i; - box_id_t itarget_or_target_parent_box = - target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; - - box_id_t neighbor_source_boxes_start = - neighbor_source_boxes_starts[itgt_box]; - box_id_t neighbor_source_boxes_count = - neighbor_source_boxes_starts[itgt_box + 1] - - neighbor_source_boxes_start; - - box_id_t from_sep_close_smaller_start = - from_sep_close_smaller_starts[itgt_box]; - box_id_t from_sep_close_smaller_count = - from_sep_close_smaller_starts[itgt_box + 1] - - from_sep_close_smaller_start; - - box_id_t from_sep_close_bigger_start = - from_sep_close_bigger_starts[itarget_or_target_parent_box]; - box_id_t from_sep_close_bigger_count = - from_sep_close_bigger_starts[itarget_or_target_parent_box + 1] - - from_sep_close_bigger_start; - - %if write_counts: - if (itgt_box == 0) - new_neighbor_source_boxes_counts[0] = 0; - - new_neighbor_source_boxes_counts[itgt_box + 1] = - neighbor_source_boxes_count - + from_sep_close_smaller_count - + from_sep_close_bigger_count - ; - %else: - - box_id_t cur_idx = new_neighbor_source_boxes_starts[itgt_box]; - - #define COPY_FROM(NAME) \ - for (box_id_t i = 0; i < NAME##_count; ++i) \ - new_neighbor_source_boxes_lists[cur_idx++] = \ - NAME##_lists[NAME##_start+i]; - - COPY_FROM(neighbor_source_boxes) - COPY_FROM(from_sep_close_smaller) - COPY_FROM(from_sep_close_bigger) - - %endif - """).build( - queue.context, - type_aliases=( - ("box_id_t", self.tree.box_id_dtype), - ), - var_values=( - ("write_counts", write_counts), - ) - ) - ntarget_boxes = len(self.target_boxes) new_neighbor_source_boxes_counts = cl.array.empty( queue, ntarget_boxes+1, self.tree.box_id_dtype) @@ -1479,14 +1491,49 @@ class FMMTraversalInfo(DeviceDataRecord): new_neighbor_source_boxes_lists, range=slice(ntarget_boxes), queue=queue) + + # }}} + + # {{{ "close" list merging -> "unified list 1" + + def merge_close_lists(self, queue, debug=False): + """Return a new :class:`FMMTraversalInfo` instance with the contents of + :attr:`from_sep_close_smaller_starts` and + :attr:`from_sep_close_bigger_starts` merged into + :attr:`neighbor_source_boxes_starts` and these two attributes set to + *None*. + """ + + new_neighbor_source_boxes_starts, new_neighbor_source_boxes_lists = ( + self.merge_lists( + queue, + # starts + (self.neighbor_source_boxes_starts, + self.from_sep_close_smaller_starts, + self.from_sep_close_bigger_starts), + # lists + (self.neighbor_source_boxes_lists, + self.from_sep_close_smaller_lists, + self.from_sep_close_bigger_lists), + # input index kinds + ("target_boxes", + "target_boxes", + "target_or_target_parent_boxes"), + # output index kind + "target_boxes", + debug)) return self.copy( - neighbor_source_boxes_starts=new_neighbor_source_boxes_starts, - neighbor_source_boxes_lists=new_neighbor_source_boxes_lists, - from_sep_close_smaller_starts=None, - from_sep_close_smaller_lists=None, - from_sep_close_bigger_starts=None, - from_sep_close_bigger_lists=None) + neighbor_source_boxes_starts=new_neighbor_source_boxes_starts, + neighbor_source_boxes_lists=new_neighbor_source_boxes_lists, + from_sep_close_smaller_starts=None, + from_sep_close_smaller_lists=None, + from_sep_close_bigger_starts=None, + from_sep_close_bigger_lists=None) + + # }}} + + # {{{ merge underfilled list 2 boxes into list # }}} @@ -1683,7 +1730,10 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), - ], []), + VectorArg(particle_id_dtype, "box_source_counts_cumul"), + ScalarArg(particle_id_dtype, + "multipole_min_nsources_cumul"), + ], ["from_sep_siblings_underfilled"]), ("from_sep_smaller", FROM_SEP_SMALLER_TEMPLATE, [ ScalarArg(coord_dtype, "stick_out_factor"), @@ -1696,7 +1746,7 @@ class FMMTraversalBuilder: VectorArg(coord_dtype, "box_target_bounding_box_max"), VectorArg(particle_id_dtype, "box_source_counts_cumul"), ScalarArg(particle_id_dtype, - "from_sep_smaller_min_nsources_cumul"), + "multipole_min_nsources_cumul"), ScalarArg(box_id_dtype, "from_sep_smaller_source_level"), ], ["from_sep_close_smaller"] @@ -1742,7 +1792,7 @@ class FMMTraversalBuilder: # {{{ driver def __call__(self, queue, tree, wait_for=None, debug=False, - _from_sep_smaller_min_nsources_cumul=None): + _multipole_min_nsources_cumul=None): """ :arg queue: A :class:`pyopencl.CommandQueue` instance. :arg tree: A :class:`boxtree.Tree` instance. @@ -1754,9 +1804,9 @@ class FMMTraversalBuilder: for dependency management. """ - if _from_sep_smaller_min_nsources_cumul is None: + if _multipole_min_nsources_cumul is None: # default to old no-threshold behavior - _from_sep_smaller_min_nsources_cumul = 0 + _multipole_min_nsources_cumul = 0 if not tree._is_pruned: raise ValueError("tree must be pruned for traversal generation") @@ -1981,9 +2031,12 @@ class FMMTraversalBuilder: target_or_target_parent_boxes.data, tree.box_parent_ids.data, same_level_non_well_sep_boxes.starts.data, same_level_non_well_sep_boxes.lists.data, + tree.box_source_counts_cumul.data, + _multipole_min_nsources_cumul, wait_for=wait_for) wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] + from_sep_siblings_underfilled = result["from_sep_siblings_underfilled"] # }}} @@ -2003,7 +2056,7 @@ class FMMTraversalBuilder: box_target_bounding_box_min.data, box_target_bounding_box_max.data, tree.box_source_counts_cumul.data, - _from_sep_smaller_min_nsources_cumul, + _multipole_min_nsources_cumul, ) from_sep_smaller_wait_for = [] -- GitLab From e7f593880e767d7d1c8fe39b85de9595da21cb6f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 24 Oct 2017 14:30:30 -0500 Subject: [PATCH 2/6] WIP2 --- boxtree/traversal.py | 49 ++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 5fc5689..44527e8 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -23,7 +23,7 @@ THE SOFTWARE. """ import numpy as np -from pytools import Record, memoize_method, memoize_in +from pytools import Record, memoize_method import pyopencl as cl import pyopencl.array # noqa import pyopencl.cltypes # noqa @@ -1111,6 +1111,11 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) # {{{ traversal info (output) +class _TargetIndexStyle: + TARGET_BOXES = 0 + TARGET_OR_TARGET_PARENT_BOXES = 1 + + class FMMTraversalInfo(DeviceDataRecord): """Interaction lists needed for a fast-multipole-like linear-time gather of particle interactions. @@ -1359,11 +1364,15 @@ class FMMTraversalInfo(DeviceDataRecord): :arg output_index_kind: Either *target_boxes* or *target_or_target_parent_boxes* """ - nlists = len(list_indexing_styles) + nlists = len(input_index_kinds) assert nlists >= 1 - assert all( - style in ("target_boxes", "target_or_target_parent_boxes") - for style in list_indexing_styles) + + if ( + output_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and IndexStyle.TARGET_BOXES in input_index_kinds): + raise NotImplementedError( + "merging a list of 'target boxes' into a list of 'target or target parent boxes'") + from pyopencl.elementwise import ElementwiseTemplate return ElementwiseTemplate("""//CL:mako// /* input: */ @@ -1389,11 +1398,11 @@ class FMMTraversalInfo(DeviceDataRecord): %endif """, """//CL:mako// - %if output_index_kind == "target_boxes": + %if output_index_style == IndexStyle.TARGET_BOXES: const box_id_t itgt_box = i; const box_id_t itarget_or_target_parent_box = target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; - %elif output_index_kind == "target_or_target_parent_boxes": + %elif output_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: const box_id_d itarget_or_target_parent_box = i; %else %endif @@ -1434,10 +1443,21 @@ class FMMTraversalInfo(DeviceDataRecord): var_values=( ("nlists", nlists), ("write_counts", write_counts), + ("input_index_styles", input_index_styles), + ("output_index_style", output_index_style), + ("IndexStyle", IndexStyle), ) ) - def merge_lists(self, queue, input_starts, input_lists, input_index_kinds, ouput_index_kind, debug=False): + def merge_lists(self, queue, input_starts, input_lists, input_index_kinds, + ouput_index_kind, debug=False): + """ + :arg input_starts: + :arg inputs_lists: + :arg input_index_kinds: + :arg output_index_kind: + """ + from boxtree.tools import reverse_index_array target_or_target_parent_boxes_from_all_boxes = reverse_index_array( self.target_or_target_parent_boxes, target_size=self.tree.nboxes, @@ -1516,11 +1536,11 @@ class FMMTraversalInfo(DeviceDataRecord): self.from_sep_close_smaller_lists, self.from_sep_close_bigger_lists), # input index kinds - ("target_boxes", - "target_boxes", - "target_or_target_parent_boxes"), + (IndexStyle.TARGET_BOXES, + IndexStyle.TARGET_BOXES, + IndexStyle.TARGET_OR_TARGET_PARENT_BOXES), # output index kind - "target_boxes", + IndexStyle.TARGET_BOXES, debug)) return self.copy( @@ -1533,7 +1553,10 @@ class FMMTraversalInfo(DeviceDataRecord): # }}} - # {{{ merge underfilled list 2 boxes into list + # {{{ merge underfilled list 2 boxes into list + + def merge_underfilled_sep_siblings_into_sep_bigger(): + pass # }}} -- GitLab From 3c9b08c86c26d6be81d4db954af5a03f4f4830a7 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 30 Oct 2017 01:02:03 -0500 Subject: [PATCH 3/6] Get list merger working again. --- boxtree/traversal.py | 115 ++++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 50 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 44527e8..07f09b3 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1111,7 +1111,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) # {{{ traversal info (output) -class _TargetIndexStyle: +class IndexStyle: TARGET_BOXES = 0 TARGET_OR_TARGET_PARENT_BOXES = 1 @@ -1355,23 +1355,23 @@ class FMMTraversalInfo(DeviceDataRecord): # {{{ list merging tools @memoize_method - def get_list_merger_kernel(self, - input_index_kinds, output_index_kind, write_counts): + def get_list_merger_kernel(self, queue, input_index_styles, output_index_style, + write_counts): """ - :arg input_index_kinds: A tuple of elements each of which is - one of the strings *target_boxes* or *target_or_target_parent_boxes* - - :arg output_index_kind: Either *target_boxes* or - *target_or_target_parent_boxes* + :arg input_index_styles: A tuple of :class:`IndexStyle` + :arg output_index_style: A :class:`IndexStyle` + :arg write_counts: A :class:`bool`, indicating whether to generate a + kernel that counts the amount of space needed """ - nlists = len(input_index_kinds) + nlists = len(input_index_styles) assert nlists >= 1 if ( output_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES - and IndexStyle.TARGET_BOXES in input_index_kinds): + and IndexStyle.TARGET_BOXES in input_index_styles): raise NotImplementedError( - "merging a list of 'target boxes' into a list of 'target or target parent boxes'") + "merging a list indexed by 'target boxes' " + "into a list indexed by 'target or target parent boxes'") from pyopencl.elementwise import ElementwiseTemplate return ElementwiseTemplate("""//CL:mako// @@ -1379,14 +1379,14 @@ class FMMTraversalInfo(DeviceDataRecord): box_id_t *target_or_target_parent_boxes_from_tgt_boxes, %for ilist in range(nlists): - box_id_t list${ilist}_starts; + box_id_t *list${ilist}_starts, %endfor %if not write_counts: %for ilist in range(nlists): - const box_id_t *list${ilist}_lists; + const box_id_t *list${ilist}_lists, %endfor - const box_id_t *new_starts; + const box_id_t *new_starts, %endif /* output: */ @@ -1404,15 +1404,26 @@ class FMMTraversalInfo(DeviceDataRecord): target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; %elif output_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: const box_id_d itarget_or_target_parent_box = i; - %else + %else: + <% raise ValueError("unknown index style") %> %endif - %for ilist, ilist_index_kind in enumerate(input_index_kinds): + %for ilist, ilist_index_style in enumerate(input_index_styles): + %if ilist_index_style == IndexStyle.TARGET_BOXES: const box_id_t list${ilist}_start = list${ilist}_starts[itgt_box]; const box_id_t list${ilist}_count = list${ilist}_starts[itgt_box + 1] - list${ilist}_start; + %elif ilist_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: + const box_id_t list${ilist}_start = + list${ilist}_starts[itarget_or_target_parent_box]; + const box_id_t list${ilist}_count = + list${ilist}_starts[itarget_or_target_parent_box + 1] + - list${ilist}_start; + %else: + <% raise ValueError("unknown index style") %> + %endif %endfor %if write_counts: @@ -1431,7 +1442,7 @@ class FMMTraversalInfo(DeviceDataRecord): for (box_id_t i = 0; i < list${ilist}_count; ++i) { new_lists[cur_idx++] = - list${ilist}_lists[list_${ilist}_start + i]; + list${ilist}_lists[list${ilist}_start + i]; } %endfor %endif @@ -1444,18 +1455,18 @@ class FMMTraversalInfo(DeviceDataRecord): ("nlists", nlists), ("write_counts", write_counts), ("input_index_styles", input_index_styles), - ("output_index_style", output_index_style), + ("output_index_style", output_index_style), ("IndexStyle", IndexStyle), ) ) - def merge_lists(self, queue, input_starts, input_lists, input_index_kinds, - ouput_index_kind, debug=False): + def merge_lists(self, queue, input_starts, input_lists, input_index_styles, + output_index_style, debug=False): """ - :arg input_starts: - :arg inputs_lists: - :arg input_index_kinds: - :arg output_index_kind: + :arg input_starts: Starts arrays of input + :arg inputs_lists: Lists arrays of input + :arg input_index_styles: A tuple of :class:`IndexStyle` + :arg output_index_style: A :class:`IndexStyle` """ from boxtree.tools import reverse_index_array @@ -1471,17 +1482,18 @@ class FMMTraversalInfo(DeviceDataRecord): ntarget_boxes = len(self.target_boxes) new_neighbor_source_boxes_counts = cl.array.empty( queue, ntarget_boxes+1, self.tree.box_id_dtype) - get_new_nb_sources_knl(True)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - - # output: - new_neighbor_source_boxes_counts, - range=slice(ntarget_boxes), - queue=queue) + self.get_list_merger_kernel( + queue, input_index_styles, output_index_style, True)( + # input: + target_or_target_parent_boxes_from_tgt_boxes, + self.neighbor_source_boxes_starts, + self.from_sep_close_smaller_starts, + self.from_sep_close_bigger_starts, + + # output: + new_neighbor_source_boxes_counts, + range=slice(ntarget_boxes), + queue=queue) new_neighbor_source_boxes_starts = cl.array.cumsum( new_neighbor_source_boxes_counts) @@ -1494,24 +1506,27 @@ class FMMTraversalInfo(DeviceDataRecord): new_neighbor_source_boxes_lists.fill(999999999) - get_new_nb_sources_knl(False)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, + self.get_list_merger_kernel( + queue, input_index_styles, output_index_style, False)( + # input: + target_or_target_parent_boxes_from_tgt_boxes, + + self.neighbor_source_boxes_starts, + self.from_sep_close_smaller_starts, + self.from_sep_close_bigger_starts, + self.neighbor_source_boxes_lists, + self.from_sep_close_smaller_lists, + self.from_sep_close_bigger_lists, + + new_neighbor_source_boxes_starts, - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - self.neighbor_source_boxes_lists, - self.from_sep_close_smaller_lists, - self.from_sep_close_bigger_lists, + # output: + new_neighbor_source_boxes_lists, + range=slice(ntarget_boxes), + queue=queue) - new_neighbor_source_boxes_starts, + return (new_neighbor_source_boxes_starts, new_neighbor_source_boxes_lists) - # output: - new_neighbor_source_boxes_lists, - range=slice(ntarget_boxes), - queue=queue) - # }}} # {{{ "close" list merging -> "unified list 1" -- GitLab From f27d176fd34e64fe04f0776a0413306ba5c2f42b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 30 Oct 2017 01:24:43 -0500 Subject: [PATCH 4/6] Fix list merge function to be generic. --- boxtree/traversal.py | 49 +++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 07f09b3..9cf2a2a 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1361,7 +1361,7 @@ class FMMTraversalInfo(DeviceDataRecord): :arg input_index_styles: A tuple of :class:`IndexStyle` :arg output_index_style: A :class:`IndexStyle` :arg write_counts: A :class:`bool`, indicating whether to generate a - kernel that counts the amount of space needed + kernel that produces box counts or box lists """ nlists = len(input_index_styles) assert nlists >= 1 @@ -1480,52 +1480,41 @@ class FMMTraversalInfo(DeviceDataRecord): del target_or_target_parent_boxes_from_all_boxes ntarget_boxes = len(self.target_boxes) - new_neighbor_source_boxes_counts = cl.array.empty( - queue, ntarget_boxes+1, self.tree.box_id_dtype) + new_counts = cl.array.empty(queue, ntarget_boxes+1, self.tree.box_id_dtype) + self.get_list_merger_kernel( - queue, input_index_styles, output_index_style, True)( + queue, input_index_styles, output_index_style, True)(*( # input: - target_or_target_parent_boxes_from_tgt_boxes, - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - + (target_or_target_parent_boxes_from_tgt_boxes,) + + input_starts # output: - new_neighbor_source_boxes_counts, + + (new_counts,)), range=slice(ntarget_boxes), queue=queue) - new_neighbor_source_boxes_starts = cl.array.cumsum( - new_neighbor_source_boxes_counts) - del new_neighbor_source_boxes_counts + new_starts = cl.array.cumsum(new_counts) + del new_counts - new_neighbor_source_boxes_lists = cl.array.empty( + new_lists = cl.array.empty( queue, - int(new_neighbor_source_boxes_starts[ntarget_boxes].get()), + int(new_starts[ntarget_boxes].get()), self.tree.box_id_dtype) - new_neighbor_source_boxes_lists.fill(999999999) + new_lists.fill(999999999) self.get_list_merger_kernel( - queue, input_index_styles, output_index_style, False)( + queue, input_index_styles, output_index_style, False)(*( # input: - target_or_target_parent_boxes_from_tgt_boxes, - - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - self.neighbor_source_boxes_lists, - self.from_sep_close_smaller_lists, - self.from_sep_close_bigger_lists, - - new_neighbor_source_boxes_starts, - + (target_or_target_parent_boxes_from_tgt_boxes,) + + input_starts + + input_lists + + (new_starts,) # output: - new_neighbor_source_boxes_lists, + + (new_lists,)), range=slice(ntarget_boxes), queue=queue) - return (new_neighbor_source_boxes_starts, new_neighbor_source_boxes_lists) + return (new_starts, new_lists) # }}} -- GitLab From d548d917993c95b0f1d6479c303e04589c20467c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Nov 2017 01:51:48 -0600 Subject: [PATCH 5/6] Add a 'source containing children' finder --- boxtree/traversal.py | 48 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 9cf2a2a..0dbec29 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1109,7 +1109,53 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) # }}} -# {{{ traversal info (output) +# {{{ replace a csr-style list with its source containing children + +# Given a CSR list of boxes, returns a new CSR list, with each box being +# replaced with a list of its children that have sources. +SOURCE_CONTAINING_CHILDREN_FINDER_TEMPLATE = r"""//CL// + +void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t i) +{ + box_id_t box_start = box_starts[i]; + box_id_t box_stop = box_starts[i+1]; + + for (box_id_t j = box_start; j < box_stop; ++j) + { + box_id_t box_id = box_lists[j]; + + if (box_flags[box_id] & BOX_HAS_OWN_SOURCES) + { + APPEND_source_containing_children(box_id); + } + + ${walk_init("box_id")} + + while (continue_walk) + { + ${walk_get_box_id()} + + if (walk_box_id) + { + if (box_flags[walk_box_id] & BOX_HAS_OWN_SOURCES) + { + APPEND_source_containing_children(walk_box_id); + } + else + { + ${walk_push("walk_box_id")} + continue; + } + } + + ${walk_advance()} + } + } +} +""" + +# }}} + class IndexStyle: TARGET_BOXES = 0 -- GitLab From 21b1914c5d1d624aaa24a4b76b0dccdbf813f09b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Nov 2017 01:52:21 -0600 Subject: [PATCH 6/6] Refactor list merging into its own class, and get things working. --- boxtree/traversal.py | 477 ++++++++++++++++++++++++++----------------- 1 file changed, 293 insertions(+), 184 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 0dbec29..03723bd 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1157,11 +1157,202 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t i) # }}} -class IndexStyle: +# {{{ list merging tools + +LIST_MERGER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + box_id_t *target_or_target_parent_boxes_from_tgt_boxes, + + %for ilist in range(nlists): + box_id_t *list${ilist}_starts, + %endfor + + %if not write_counts: + %for ilist in range(nlists): + const box_id_t *list${ilist}_lists, + %endfor + const box_id_t *new_starts, + %endif + + /* output: */ + + %if not write_counts: + box_id_t *new_lists, + %else: + box_id_t *new_counts, + %endif + """, + + operation=r"""//CL:mako// + /* Compute current index. */ + %if output_index_style == _IndexStyle.TARGET_BOXES: + const box_id_t itgt_box = i; + const box_id_t itarget_or_target_parent_box = + target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; + %elif output_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: + const box_id_t itarget_or_target_parent_box = i; + %else: + <% raise ValueError("unknown index style") %> + %endif + + /* Count the size of the input at the current index. */ + %for ilist, ilist_index_style in enumerate(input_index_styles): + %if ilist_index_style == _IndexStyle.TARGET_BOXES: + const box_id_t list${ilist}_start = + list${ilist}_starts[itgt_box]; + const box_id_t list${ilist}_count = + list${ilist}_starts[itgt_box + 1] + - list${ilist}_start; + %elif ilist_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: + const box_id_t list${ilist}_start = + list${ilist}_starts[itarget_or_target_parent_box]; + const box_id_t list${ilist}_count = + list${ilist}_starts[itarget_or_target_parent_box + 1] + - list${ilist}_start; + %else: + <% raise ValueError("unknown index style") %> + %endif + %endfor + + /* Update the counts or copy the elements. */ + %if write_counts: + if (i == 0) + new_counts[0] = 0; + + new_counts[i + 1] = + %for ilist in range(nlists): + + list${ilist}_count + %endfor + ; + %else: + box_id_t cur_idx = new_starts[i]; + + %for ilist in range(nlists): + for (box_id_t j = 0; j < list${ilist}_count; ++j) + { + new_lists[cur_idx++] = + list${ilist}_lists[list${ilist}_start + j]; + } + %endfor + %endif + """, + + name="merge_lists") + + +class _IndexStyle: TARGET_BOXES = 0 TARGET_OR_TARGET_PARENT_BOXES = 1 +class _ListMerger(object): + def __init__(self, context, box_id_dtype): + self.context = context + self.box_id_dtype = box_id_dtype + + @memoize_method + def get_list_merger_kernel(self, input_index_styles, output_index_style, + write_counts): + """ + :arg input_index_styles: A tuple of :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + :arg write_counts: A :class:`bool`, indicating whether to generate a + kernel that produces box counts or box lists + """ + nlists = len(input_index_styles) + assert nlists >= 1 + + if ( + output_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and _IndexStyle.TARGET_BOXES in input_index_styles): + raise NotImplementedError( + "merging a list indexed by target boxes " + "into a list indexed by target or target parent boxes") + + return LIST_MERGER_TEMPLATE.build( + self.context, + type_aliases=( + ("box_id_t", self.box_id_dtype), + ), + var_values=( + ("nlists", nlists), + ("write_counts", write_counts), + ("input_index_styles", input_index_styles), + ("output_index_style", output_index_style), + ("_IndexStyle", _IndexStyle), + )) + + def __call__(self, queue, input_starts, input_lists, input_index_styles, + output_index_style, target_boxes, target_or_target_parent_boxes, + nboxes, debug=False, wait_for=[]): + """ + :arg input_starts: Starts arrays of input + :arg inputs_lists: Lists arrays of input + :arg input_index_styles: A tuple of :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + """ + + cl.wait_for_events(wait_for) + + from boxtree.tools import reverse_index_array + target_or_target_parent_boxes_from_all_boxes = reverse_index_array( + target_or_target_parent_boxes, target_size=nboxes, + queue=queue) + target_or_target_parent_boxes_from_tgt_boxes = cl.array.take( + target_or_target_parent_boxes_from_all_boxes, + target_boxes, queue=queue) + + del target_or_target_parent_boxes_from_all_boxes + + ntarget_boxes = len(target_boxes) + ntarget_or_ntarget_parent_boxes = len(target_or_target_parent_boxes) + + output_size = (ntarget_boxes + if output_index_style == _IndexStyle.TARGET_BOXES + else ntarget_or_ntarget_parent_boxes) + + new_counts = cl.array.empty(queue, output_size+1, self.box_id_dtype) + + self.get_list_merger_kernel( + input_index_styles, output_index_style, True)(*( + # input: + (target_or_target_parent_boxes_from_tgt_boxes,) + + input_starts + # output: + + (new_counts,)), + range=slice(output_size), + queue=queue) + + new_starts = cl.array.cumsum(new_counts) + del new_counts + + new_lists = cl.array.empty( + queue, + int(new_starts[-1].get()), + self.box_id_dtype) + + new_lists.fill(999999999) + + self.get_list_merger_kernel( + input_index_styles, output_index_style, False)(*( + # input: + (target_or_target_parent_boxes_from_tgt_boxes,) + + input_starts + + input_lists + + (new_starts,) + # output: + + (new_lists,)), + range=slice(output_size), + queue=queue) + + return (new_starts, new_lists) + +# }}} + + +# {{{ traversal info (output) + class FMMTraversalInfo(DeviceDataRecord): """Interaction lists needed for a fast-multipole-like linear-time gather of particle interactions. @@ -1398,172 +1589,6 @@ class FMMTraversalInfo(DeviceDataRecord): """ - # {{{ list merging tools - - @memoize_method - def get_list_merger_kernel(self, queue, input_index_styles, output_index_style, - write_counts): - """ - :arg input_index_styles: A tuple of :class:`IndexStyle` - :arg output_index_style: A :class:`IndexStyle` - :arg write_counts: A :class:`bool`, indicating whether to generate a - kernel that produces box counts or box lists - """ - nlists = len(input_index_styles) - assert nlists >= 1 - - if ( - output_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES - and IndexStyle.TARGET_BOXES in input_index_styles): - raise NotImplementedError( - "merging a list indexed by 'target boxes' " - "into a list indexed by 'target or target parent boxes'") - - from pyopencl.elementwise import ElementwiseTemplate - return ElementwiseTemplate("""//CL:mako// - /* input: */ - box_id_t *target_or_target_parent_boxes_from_tgt_boxes, - - %for ilist in range(nlists): - box_id_t *list${ilist}_starts, - %endfor - - %if not write_counts: - %for ilist in range(nlists): - const box_id_t *list${ilist}_lists, - %endfor - const box_id_t *new_starts, - %endif - - /* output: */ - - %if not write_counts: - box_id_t *new_lists, - %else: - box_id_t *new_counts, - %endif - """, - """//CL:mako// - %if output_index_style == IndexStyle.TARGET_BOXES: - const box_id_t itgt_box = i; - const box_id_t itarget_or_target_parent_box = - target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; - %elif output_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: - const box_id_d itarget_or_target_parent_box = i; - %else: - <% raise ValueError("unknown index style") %> - %endif - - %for ilist, ilist_index_style in enumerate(input_index_styles): - %if ilist_index_style == IndexStyle.TARGET_BOXES: - const box_id_t list${ilist}_start = - list${ilist}_starts[itgt_box]; - const box_id_t list${ilist}_count = - list${ilist}_starts[itgt_box + 1] - - list${ilist}_start; - %elif ilist_index_style == IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: - const box_id_t list${ilist}_start = - list${ilist}_starts[itarget_or_target_parent_box]; - const box_id_t list${ilist}_count = - list${ilist}_starts[itarget_or_target_parent_box + 1] - - list${ilist}_start; - %else: - <% raise ValueError("unknown index style") %> - %endif - %endfor - - %if write_counts: - if (itgt_box == 0) - new_counts[0] = 0; - - new_counts[itgt_box + 1] = - %for ilist in range(nlists): - + list${ilist}_count - %endfor - ; - %else: - box_id_t cur_idx = new_starts[itgt_box]; - - %for ilist in range(nlists): - for (box_id_t i = 0; i < list${ilist}_count; ++i) - { - new_lists[cur_idx++] = - list${ilist}_lists[list${ilist}_start + i]; - } - %endfor - %endif - """).build( - queue.context, - type_aliases=( - ("box_id_t", self.tree.box_id_dtype), - ), - var_values=( - ("nlists", nlists), - ("write_counts", write_counts), - ("input_index_styles", input_index_styles), - ("output_index_style", output_index_style), - ("IndexStyle", IndexStyle), - ) - ) - - def merge_lists(self, queue, input_starts, input_lists, input_index_styles, - output_index_style, debug=False): - """ - :arg input_starts: Starts arrays of input - :arg inputs_lists: Lists arrays of input - :arg input_index_styles: A tuple of :class:`IndexStyle` - :arg output_index_style: A :class:`IndexStyle` - """ - - from boxtree.tools import reverse_index_array - target_or_target_parent_boxes_from_all_boxes = reverse_index_array( - self.target_or_target_parent_boxes, target_size=self.tree.nboxes, - queue=queue) - target_or_target_parent_boxes_from_tgt_boxes = cl.array.take( - target_or_target_parent_boxes_from_all_boxes, - self.target_boxes, queue=queue) - - del target_or_target_parent_boxes_from_all_boxes - - ntarget_boxes = len(self.target_boxes) - new_counts = cl.array.empty(queue, ntarget_boxes+1, self.tree.box_id_dtype) - - self.get_list_merger_kernel( - queue, input_index_styles, output_index_style, True)(*( - # input: - (target_or_target_parent_boxes_from_tgt_boxes,) - + input_starts - # output: - + (new_counts,)), - range=slice(ntarget_boxes), - queue=queue) - - new_starts = cl.array.cumsum(new_counts) - del new_counts - - new_lists = cl.array.empty( - queue, - int(new_starts[ntarget_boxes].get()), - self.tree.box_id_dtype) - - new_lists.fill(999999999) - - self.get_list_merger_kernel( - queue, input_index_styles, output_index_style, False)(*( - # input: - (target_or_target_parent_boxes_from_tgt_boxes,) - + input_starts - + input_lists - + (new_starts,) - # output: - + (new_lists,)), - range=slice(ntarget_boxes), - queue=queue) - - return (new_starts, new_lists) - - # }}} - # {{{ "close" list merging -> "unified list 1" def merge_close_lists(self, queue, debug=False): @@ -1574,8 +1599,10 @@ class FMMTraversalInfo(DeviceDataRecord): *None*. """ + list_merger = _ListMerger(queue.context, self.tree.box_id_dtype) + new_neighbor_source_boxes_starts, new_neighbor_source_boxes_lists = ( - self.merge_lists( + list_merger( queue, # starts (self.neighbor_source_boxes_starts, @@ -1585,12 +1612,16 @@ class FMMTraversalInfo(DeviceDataRecord): (self.neighbor_source_boxes_lists, self.from_sep_close_smaller_lists, self.from_sep_close_bigger_lists), - # input index kinds - (IndexStyle.TARGET_BOXES, - IndexStyle.TARGET_BOXES, - IndexStyle.TARGET_OR_TARGET_PARENT_BOXES), - # output index kind - IndexStyle.TARGET_BOXES, + # input index styles + (_IndexStyle.TARGET_BOXES, + _IndexStyle.TARGET_BOXES, + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES), + # output index style + _IndexStyle.TARGET_BOXES, + # box and tree data + self.target_boxes, + self.target_or_target_parent_boxes, + self.tree.nboxes, debug)) return self.copy( @@ -1603,13 +1634,6 @@ class FMMTraversalInfo(DeviceDataRecord): # }}} - # {{{ merge underfilled list 2 boxes into list - - def merge_underfilled_sep_siblings_into_sep_bigger(): - pass - - # }}} - # {{{ debugging aids def get_box_list(self, what, index): @@ -1856,6 +1880,35 @@ class FMMTraversalBuilder: # }}} + # {{{ list merger + + result["list_merger"] = _ListMerger(self.context, box_id_dtype) + + # }}} + + # {{{ source containing children + + src = Template( + TRAVERSAL_PREAMBLE_TEMPLATE + + SOURCE_CONTAINING_CHILDREN_FINDER_TEMPLATE, + strict_undefined=True).render(**render_vars) + + result["source_containing_children_finder"] = ( + ListOfListsBuilder(self.context, + [("source_containing_children", box_id_dtype)], + str(src), + arg_decls=[ + ScalarArg(box_id_dtype, "aligned_nboxes"), + VectorArg(box_id_dtype, "box_child_ids"), + VectorArg(box_flags_enum.dtype, "box_flags"), + VectorArg(box_id_dtype, "box_starts"), + VectorArg(box_id_dtype, "box_lists"), + ], + debug=debug, + name_prefix="source_containing_children")) + + # }}} + logger.info("traversal build kernels: done") return _KernelInfo(**result) @@ -2180,7 +2233,8 @@ class FMMTraversalBuilder: wait_for=wait_for) wait_for = [evt] - from_sep_bigger = result["from_sep_bigger"] + from_sep_bigger_starts = result["from_sep_bigger"].starts + from_sep_bigger_lists = result["from_sep_bigger"].lists if with_extent: from_sep_close_bigger_starts = result["from_sep_close_bigger"].starts @@ -2189,6 +2243,61 @@ class FMMTraversalBuilder: from_sep_close_bigger_starts = None from_sep_close_bigger_lists = None + # {{{ combine from_sep_siblings_underfilled into from_sep_bigger + + if len(from_sep_siblings_underfilled.lists) > 0: + fin_debug("combining underfilled list 2 boxes into list 4") + + # {{{ find source containing children + + result, evt = knl_info.source_containing_children_finder( + queue, len(target_or_target_parent_boxes), + tree.aligned_nboxes, tree.box_child_ids.data, + tree.box_flags.data, + from_sep_siblings_underfilled.starts.data, + from_sep_siblings_underfilled.lists.data, + wait_for=wait_for) + + wait_for = [evt] + + from_sep_siblings_underfilled_children_starts = ( + result["source_containing_children"].starts) + from_sep_siblings_underfilled_children_lists = ( + result["source_containing_children"].lists) + + # }}} + + # {{{ merge lists + + from_sep_bigger_starts, from_sep_bigger_lists = knl_info.list_merger( + queue, + # starts + (from_sep_siblings_underfilled_children_starts, + from_sep_bigger_starts), + # lists + (from_sep_siblings_underfilled_children_lists, + from_sep_bigger_lists), + # input index styles + (_IndexStyle.TARGET_OR_TARGET_PARENT_BOXES, + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES), + # output index style + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES, + # box and tree data + target_boxes, + target_or_target_parent_boxes, + tree.nboxes, + debug, + wait_for=wait_for) + + del from_sep_siblings_underfilled_children_starts + del from_sep_siblings_underfilled_children_lists + + # }}} + + del from_sep_siblings_underfilled + + # }}} + # }}} if self.well_sep_is_n_away == 1: @@ -2243,8 +2352,8 @@ class FMMTraversalBuilder: from_sep_close_smaller_starts=from_sep_close_smaller_starts, from_sep_close_smaller_lists=from_sep_close_smaller_lists, - from_sep_bigger_starts=from_sep_bigger.starts, - from_sep_bigger_lists=from_sep_bigger.lists, + from_sep_bigger_starts=from_sep_bigger_starts, + from_sep_bigger_lists=from_sep_bigger_lists, from_sep_close_bigger_starts=from_sep_close_bigger_starts, from_sep_close_bigger_lists=from_sep_close_bigger_lists, -- GitLab