diff --git a/boxtree/traversal.py b/boxtree/traversal.py index a35c60d08cfcc9850b49f50fd7fd4b561c1019d4..03723bd00eace55cd462c58fb0cd5833238ae6e5 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -23,7 +23,7 @@ THE SOFTWARE. """ import numpy as np -from pytools import Record, memoize_method, memoize_in +from pytools import Record, memoize_method import pyopencl as cl import pyopencl.array # noqa import pyopencl.cltypes # noqa @@ -618,7 +618,15 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) if (sep) { - APPEND_from_sep_siblings(sib_box_id); + if (box_source_counts_cumul[sib_box_id] + >= multipole_min_nsources_cumul) + { + APPEND_from_sep_siblings(sib_box_id); + } + else + { + APPEND_from_sep_siblings_underfilled(sib_box_id); + } } } } @@ -812,7 +820,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) if (meets_sep_crit && box_source_counts_cumul[walk_box_id] - >= from_sep_smaller_min_nsources_cumul) + >= multipole_min_nsources_cumul) { if (from_sep_smaller_source_level == walk_level) APPEND_from_sep_smaller(walk_box_id); @@ -1101,6 +1109,248 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) # }}} +# {{{ replace a csr-style list with its source containing children + +# Given a CSR list of boxes, returns a new CSR list, with each box being +# replaced with a list of its children that have sources. +SOURCE_CONTAINING_CHILDREN_FINDER_TEMPLATE = r"""//CL// + +void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t i) +{ + box_id_t box_start = box_starts[i]; + box_id_t box_stop = box_starts[i+1]; + + for (box_id_t j = box_start; j < box_stop; ++j) + { + box_id_t box_id = box_lists[j]; + + if (box_flags[box_id] & BOX_HAS_OWN_SOURCES) + { + APPEND_source_containing_children(box_id); + } + + ${walk_init("box_id")} + + while (continue_walk) + { + ${walk_get_box_id()} + + if (walk_box_id) + { + if (box_flags[walk_box_id] & BOX_HAS_OWN_SOURCES) + { + APPEND_source_containing_children(walk_box_id); + } + else + { + ${walk_push("walk_box_id")} + continue; + } + } + + ${walk_advance()} + } + } +} +""" + +# }}} + + +# {{{ list merging tools + +LIST_MERGER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + box_id_t *target_or_target_parent_boxes_from_tgt_boxes, + + %for ilist in range(nlists): + box_id_t *list${ilist}_starts, + %endfor + + %if not write_counts: + %for ilist in range(nlists): + const box_id_t *list${ilist}_lists, + %endfor + const box_id_t *new_starts, + %endif + + /* output: */ + + %if not write_counts: + box_id_t *new_lists, + %else: + box_id_t *new_counts, + %endif + """, + + operation=r"""//CL:mako// + /* Compute current index. */ + %if output_index_style == _IndexStyle.TARGET_BOXES: + const box_id_t itgt_box = i; + const box_id_t itarget_or_target_parent_box = + target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; + %elif output_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: + const box_id_t itarget_or_target_parent_box = i; + %else: + <% raise ValueError("unknown index style") %> + %endif + + /* Count the size of the input at the current index. */ + %for ilist, ilist_index_style in enumerate(input_index_styles): + %if ilist_index_style == _IndexStyle.TARGET_BOXES: + const box_id_t list${ilist}_start = + list${ilist}_starts[itgt_box]; + const box_id_t list${ilist}_count = + list${ilist}_starts[itgt_box + 1] + - list${ilist}_start; + %elif ilist_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES: + const box_id_t list${ilist}_start = + list${ilist}_starts[itarget_or_target_parent_box]; + const box_id_t list${ilist}_count = + list${ilist}_starts[itarget_or_target_parent_box + 1] + - list${ilist}_start; + %else: + <% raise ValueError("unknown index style") %> + %endif + %endfor + + /* Update the counts or copy the elements. */ + %if write_counts: + if (i == 0) + new_counts[0] = 0; + + new_counts[i + 1] = + %for ilist in range(nlists): + + list${ilist}_count + %endfor + ; + %else: + box_id_t cur_idx = new_starts[i]; + + %for ilist in range(nlists): + for (box_id_t j = 0; j < list${ilist}_count; ++j) + { + new_lists[cur_idx++] = + list${ilist}_lists[list${ilist}_start + j]; + } + %endfor + %endif + """, + + name="merge_lists") + + +class _IndexStyle: + TARGET_BOXES = 0 + TARGET_OR_TARGET_PARENT_BOXES = 1 + + +class _ListMerger(object): + def __init__(self, context, box_id_dtype): + self.context = context + self.box_id_dtype = box_id_dtype + + @memoize_method + def get_list_merger_kernel(self, input_index_styles, output_index_style, + write_counts): + """ + :arg input_index_styles: A tuple of :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + :arg write_counts: A :class:`bool`, indicating whether to generate a + kernel that produces box counts or box lists + """ + nlists = len(input_index_styles) + assert nlists >= 1 + + if ( + output_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and _IndexStyle.TARGET_BOXES in input_index_styles): + raise NotImplementedError( + "merging a list indexed by target boxes " + "into a list indexed by target or target parent boxes") + + return LIST_MERGER_TEMPLATE.build( + self.context, + type_aliases=( + ("box_id_t", self.box_id_dtype), + ), + var_values=( + ("nlists", nlists), + ("write_counts", write_counts), + ("input_index_styles", input_index_styles), + ("output_index_style", output_index_style), + ("_IndexStyle", _IndexStyle), + )) + + def __call__(self, queue, input_starts, input_lists, input_index_styles, + output_index_style, target_boxes, target_or_target_parent_boxes, + nboxes, debug=False, wait_for=[]): + """ + :arg input_starts: Starts arrays of input + :arg inputs_lists: Lists arrays of input + :arg input_index_styles: A tuple of :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + """ + + cl.wait_for_events(wait_for) + + from boxtree.tools import reverse_index_array + target_or_target_parent_boxes_from_all_boxes = reverse_index_array( + target_or_target_parent_boxes, target_size=nboxes, + queue=queue) + target_or_target_parent_boxes_from_tgt_boxes = cl.array.take( + target_or_target_parent_boxes_from_all_boxes, + target_boxes, queue=queue) + + del target_or_target_parent_boxes_from_all_boxes + + ntarget_boxes = len(target_boxes) + ntarget_or_ntarget_parent_boxes = len(target_or_target_parent_boxes) + + output_size = (ntarget_boxes + if output_index_style == _IndexStyle.TARGET_BOXES + else ntarget_or_ntarget_parent_boxes) + + new_counts = cl.array.empty(queue, output_size+1, self.box_id_dtype) + + self.get_list_merger_kernel( + input_index_styles, output_index_style, True)(*( + # input: + (target_or_target_parent_boxes_from_tgt_boxes,) + + input_starts + # output: + + (new_counts,)), + range=slice(output_size), + queue=queue) + + new_starts = cl.array.cumsum(new_counts) + del new_counts + + new_lists = cl.array.empty( + queue, + int(new_starts[-1].get()), + self.box_id_dtype) + + new_lists.fill(999999999) + + self.get_list_merger_kernel( + input_index_styles, output_index_style, False)(*( + # input: + (target_or_target_parent_boxes_from_tgt_boxes,) + + input_starts + + input_lists + + (new_starts,) + # output: + + (new_lists,)), + range=slice(output_size), + queue=queue) + + return (new_starts, new_lists) + +# }}} + + # {{{ traversal info (output) class FMMTraversalInfo(DeviceDataRecord): @@ -1309,6 +1559,10 @@ class FMMTraversalInfo(DeviceDataRecord): (Note: This list contains global box numbers, not indices into :attr:`source_boxes`.) + (Optionally, this can also contain source boxes smaller than the target box + which are separated by the target box's size. These boxes' ancestors did not + meet the source count threshold for inclusion in List 2.) + If :attr:`boxtree.Tree.sources_have_extent` or :attr:`boxtree.Tree.targets_have_extent`, then :attr:`from_sep_close_bigger_starts` will be non-*None*. It records @@ -1332,6 +1586,7 @@ class FMMTraversalInfo(DeviceDataRecord): .. attribute:: from_sep_close_bigger_lists ``box_id_t [*]`` (or *None*) + """ # {{{ "close" list merging -> "unified list 1" @@ -1344,149 +1599,38 @@ class FMMTraversalInfo(DeviceDataRecord): *None*. """ - from boxtree.tools import reverse_index_array - target_or_target_parent_boxes_from_all_boxes = reverse_index_array( - self.target_or_target_parent_boxes, target_size=self.tree.nboxes, - queue=queue) - target_or_target_parent_boxes_from_tgt_boxes = cl.array.take( - target_or_target_parent_boxes_from_all_boxes, - self.target_boxes, queue=queue) - - del target_or_target_parent_boxes_from_all_boxes - - @memoize_in(self, "merge_close_lists_kernel") - def get_new_nb_sources_knl(write_counts): - from pyopencl.elementwise import ElementwiseTemplate - return ElementwiseTemplate("""//CL:mako// - /* input: */ - box_id_t *target_or_target_parent_boxes_from_tgt_boxes, - box_id_t *neighbor_source_boxes_starts, - box_id_t *from_sep_close_smaller_starts, - box_id_t *from_sep_close_bigger_starts, - - %if not write_counts: - box_id_t *neighbor_source_boxes_lists, - box_id_t *from_sep_close_smaller_lists, - box_id_t *from_sep_close_bigger_lists, - - box_id_t *new_neighbor_source_boxes_starts, - %endif - - /* output: */ - - %if write_counts: - box_id_t *new_neighbor_source_boxes_counts, - %else: - box_id_t *new_neighbor_source_boxes_lists, - %endif - """, - """//CL:mako// - box_id_t itgt_box = i; - box_id_t itarget_or_target_parent_box = - target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; - - box_id_t neighbor_source_boxes_start = - neighbor_source_boxes_starts[itgt_box]; - box_id_t neighbor_source_boxes_count = - neighbor_source_boxes_starts[itgt_box + 1] - - neighbor_source_boxes_start; - - box_id_t from_sep_close_smaller_start = - from_sep_close_smaller_starts[itgt_box]; - box_id_t from_sep_close_smaller_count = - from_sep_close_smaller_starts[itgt_box + 1] - - from_sep_close_smaller_start; - - box_id_t from_sep_close_bigger_start = - from_sep_close_bigger_starts[itarget_or_target_parent_box]; - box_id_t from_sep_close_bigger_count = - from_sep_close_bigger_starts[itarget_or_target_parent_box + 1] - - from_sep_close_bigger_start; - - %if write_counts: - if (itgt_box == 0) - new_neighbor_source_boxes_counts[0] = 0; - - new_neighbor_source_boxes_counts[itgt_box + 1] = - neighbor_source_boxes_count - + from_sep_close_smaller_count - + from_sep_close_bigger_count - ; - %else: - - box_id_t cur_idx = new_neighbor_source_boxes_starts[itgt_box]; - - #define COPY_FROM(NAME) \ - for (box_id_t i = 0; i < NAME##_count; ++i) \ - new_neighbor_source_boxes_lists[cur_idx++] = \ - NAME##_lists[NAME##_start+i]; - - COPY_FROM(neighbor_source_boxes) - COPY_FROM(from_sep_close_smaller) - COPY_FROM(from_sep_close_bigger) - - %endif - """).build( - queue.context, - type_aliases=( - ("box_id_t", self.tree.box_id_dtype), - ), - var_values=( - ("write_counts", write_counts), - ) - ) - - ntarget_boxes = len(self.target_boxes) - new_neighbor_source_boxes_counts = cl.array.empty( - queue, ntarget_boxes+1, self.tree.box_id_dtype) - get_new_nb_sources_knl(True)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - - # output: - new_neighbor_source_boxes_counts, - range=slice(ntarget_boxes), - queue=queue) - - new_neighbor_source_boxes_starts = cl.array.cumsum( - new_neighbor_source_boxes_counts) - del new_neighbor_source_boxes_counts - - new_neighbor_source_boxes_lists = cl.array.empty( - queue, - int(new_neighbor_source_boxes_starts[ntarget_boxes].get()), - self.tree.box_id_dtype) - - new_neighbor_source_boxes_lists.fill(999999999) - - get_new_nb_sources_knl(False)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - self.neighbor_source_boxes_lists, - self.from_sep_close_smaller_lists, - self.from_sep_close_bigger_lists, - - new_neighbor_source_boxes_starts, - - # output: - new_neighbor_source_boxes_lists, - range=slice(ntarget_boxes), - queue=queue) + list_merger = _ListMerger(queue.context, self.tree.box_id_dtype) + + new_neighbor_source_boxes_starts, new_neighbor_source_boxes_lists = ( + list_merger( + queue, + # starts + (self.neighbor_source_boxes_starts, + self.from_sep_close_smaller_starts, + self.from_sep_close_bigger_starts), + # lists + (self.neighbor_source_boxes_lists, + self.from_sep_close_smaller_lists, + self.from_sep_close_bigger_lists), + # input index styles + (_IndexStyle.TARGET_BOXES, + _IndexStyle.TARGET_BOXES, + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES), + # output index style + _IndexStyle.TARGET_BOXES, + # box and tree data + self.target_boxes, + self.target_or_target_parent_boxes, + self.tree.nboxes, + debug)) return self.copy( - neighbor_source_boxes_starts=new_neighbor_source_boxes_starts, - neighbor_source_boxes_lists=new_neighbor_source_boxes_lists, - from_sep_close_smaller_starts=None, - from_sep_close_smaller_lists=None, - from_sep_close_bigger_starts=None, - from_sep_close_bigger_lists=None) + neighbor_source_boxes_starts=new_neighbor_source_boxes_starts, + neighbor_source_boxes_lists=new_neighbor_source_boxes_lists, + from_sep_close_smaller_starts=None, + from_sep_close_smaller_lists=None, + from_sep_close_bigger_starts=None, + from_sep_close_bigger_lists=None) # }}} @@ -1683,7 +1827,10 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), - ], []), + VectorArg(particle_id_dtype, "box_source_counts_cumul"), + ScalarArg(particle_id_dtype, + "multipole_min_nsources_cumul"), + ], ["from_sep_siblings_underfilled"]), ("from_sep_smaller", FROM_SEP_SMALLER_TEMPLATE, [ ScalarArg(coord_dtype, "stick_out_factor"), @@ -1696,7 +1843,7 @@ class FMMTraversalBuilder: VectorArg(coord_dtype, "box_target_bounding_box_max"), VectorArg(particle_id_dtype, "box_source_counts_cumul"), ScalarArg(particle_id_dtype, - "from_sep_smaller_min_nsources_cumul"), + "multipole_min_nsources_cumul"), ScalarArg(box_id_dtype, "from_sep_smaller_source_level"), ], ["from_sep_close_smaller"] @@ -1733,6 +1880,35 @@ class FMMTraversalBuilder: # }}} + # {{{ list merger + + result["list_merger"] = _ListMerger(self.context, box_id_dtype) + + # }}} + + # {{{ source containing children + + src = Template( + TRAVERSAL_PREAMBLE_TEMPLATE + + SOURCE_CONTAINING_CHILDREN_FINDER_TEMPLATE, + strict_undefined=True).render(**render_vars) + + result["source_containing_children_finder"] = ( + ListOfListsBuilder(self.context, + [("source_containing_children", box_id_dtype)], + str(src), + arg_decls=[ + ScalarArg(box_id_dtype, "aligned_nboxes"), + VectorArg(box_id_dtype, "box_child_ids"), + VectorArg(box_flags_enum.dtype, "box_flags"), + VectorArg(box_id_dtype, "box_starts"), + VectorArg(box_id_dtype, "box_lists"), + ], + debug=debug, + name_prefix="source_containing_children")) + + # }}} + logger.info("traversal build kernels: done") return _KernelInfo(**result) @@ -1742,7 +1918,7 @@ class FMMTraversalBuilder: # {{{ driver def __call__(self, queue, tree, wait_for=None, debug=False, - _from_sep_smaller_min_nsources_cumul=None): + _multipole_min_nsources_cumul=None): """ :arg queue: A :class:`pyopencl.CommandQueue` instance. :arg tree: A :class:`boxtree.Tree` instance. @@ -1754,9 +1930,9 @@ class FMMTraversalBuilder: for dependency management. """ - if _from_sep_smaller_min_nsources_cumul is None: + if _multipole_min_nsources_cumul is None: # default to old no-threshold behavior - _from_sep_smaller_min_nsources_cumul = 0 + _multipole_min_nsources_cumul = 0 if not tree._is_pruned: raise ValueError("tree must be pruned for traversal generation") @@ -1981,9 +2157,12 @@ class FMMTraversalBuilder: target_or_target_parent_boxes.data, tree.box_parent_ids.data, same_level_non_well_sep_boxes.starts.data, same_level_non_well_sep_boxes.lists.data, + tree.box_source_counts_cumul.data, + _multipole_min_nsources_cumul, wait_for=wait_for) wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] + from_sep_siblings_underfilled = result["from_sep_siblings_underfilled"] # }}} @@ -2003,7 +2182,7 @@ class FMMTraversalBuilder: box_target_bounding_box_min.data, box_target_bounding_box_max.data, tree.box_source_counts_cumul.data, - _from_sep_smaller_min_nsources_cumul, + _multipole_min_nsources_cumul, ) from_sep_smaller_wait_for = [] @@ -2054,7 +2233,8 @@ class FMMTraversalBuilder: wait_for=wait_for) wait_for = [evt] - from_sep_bigger = result["from_sep_bigger"] + from_sep_bigger_starts = result["from_sep_bigger"].starts + from_sep_bigger_lists = result["from_sep_bigger"].lists if with_extent: from_sep_close_bigger_starts = result["from_sep_close_bigger"].starts @@ -2063,6 +2243,61 @@ class FMMTraversalBuilder: from_sep_close_bigger_starts = None from_sep_close_bigger_lists = None + # {{{ combine from_sep_siblings_underfilled into from_sep_bigger + + if len(from_sep_siblings_underfilled.lists) > 0: + fin_debug("combining underfilled list 2 boxes into list 4") + + # {{{ find source containing children + + result, evt = knl_info.source_containing_children_finder( + queue, len(target_or_target_parent_boxes), + tree.aligned_nboxes, tree.box_child_ids.data, + tree.box_flags.data, + from_sep_siblings_underfilled.starts.data, + from_sep_siblings_underfilled.lists.data, + wait_for=wait_for) + + wait_for = [evt] + + from_sep_siblings_underfilled_children_starts = ( + result["source_containing_children"].starts) + from_sep_siblings_underfilled_children_lists = ( + result["source_containing_children"].lists) + + # }}} + + # {{{ merge lists + + from_sep_bigger_starts, from_sep_bigger_lists = knl_info.list_merger( + queue, + # starts + (from_sep_siblings_underfilled_children_starts, + from_sep_bigger_starts), + # lists + (from_sep_siblings_underfilled_children_lists, + from_sep_bigger_lists), + # input index styles + (_IndexStyle.TARGET_OR_TARGET_PARENT_BOXES, + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES), + # output index style + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES, + # box and tree data + target_boxes, + target_or_target_parent_boxes, + tree.nboxes, + debug, + wait_for=wait_for) + + del from_sep_siblings_underfilled_children_starts + del from_sep_siblings_underfilled_children_lists + + # }}} + + del from_sep_siblings_underfilled + + # }}} + # }}} if self.well_sep_is_n_away == 1: @@ -2117,8 +2352,8 @@ class FMMTraversalBuilder: from_sep_close_smaller_starts=from_sep_close_smaller_starts, from_sep_close_smaller_lists=from_sep_close_smaller_lists, - from_sep_bigger_starts=from_sep_bigger.starts, - from_sep_bigger_lists=from_sep_bigger.lists, + from_sep_bigger_starts=from_sep_bigger_starts, + from_sep_bigger_lists=from_sep_bigger_lists, from_sep_close_bigger_starts=from_sep_close_bigger_starts, from_sep_close_bigger_lists=from_sep_close_bigger_lists,