diff --git a/boxtree/distributed/calculation.py b/boxtree/distributed/calculation.py index 107cb76f5a24be28828f472a36a33b61c2e35827..e565fe5bb0fcc3a0f34a0fe29d65d989e6165576 100644 --- a/boxtree/distributed/calculation.py +++ b/boxtree/distributed/calculation.py @@ -477,7 +477,7 @@ def calculate_pot(local_wrangler, global_wrangler, local_trav, source_weights, "('list 4 close')") potentials = potentials + local_wrangler.eval_direct( - local_trav.target_or_target_parent_boxes, + local_trav.target_boxes, local_trav.from_sep_close_bigger_starts, local_trav.from_sep_close_bigger_lists, local_src_weights)[0] diff --git a/boxtree/distributed/partition.py b/boxtree/distributed/partition.py index ecaa77c1d3551ac4d2e277feda511072c7f91997..453b73f494c9574a8c2d5bbea9ff7d8b0453aa77 100644 --- a/boxtree/distributed/partition.py +++ b/boxtree/distributed/partition.py @@ -264,7 +264,7 @@ class ResponsibleBoxesQuery(object): # Add list 4 close of responsible boxes if self.traversal.from_sep_close_bigger_starts is not None: self.add_interaction_list_boxes( - self.target_or_target_parent_boxes_dev, + self.target_boxes_dev, responsible_boxes_mask | ancestor_boxes_mask, self.from_sep_close_bigger_starts_dev, self.from_sep_close_bigger_lists_dev, diff --git a/boxtree/distributed/perf_model.py b/boxtree/distributed/perf_model.py index 32103030fac2c104078783ac0e85189839f2c3d3..049d54188420dd99ccd6f793f79464d04ca71d80 100644 --- a/boxtree/distributed/perf_model.py +++ b/boxtree/distributed/perf_model.py @@ -229,7 +229,7 @@ class PerformanceCounter: ) if traversal.from_sep_close_bigger_starts is not None: - ndirect_src_boxes[traversal.target_or_target_parent_boxes] += ( + ndirect_src_boxes[traversal.target_boxes] += ( traversal.from_sep_close_bigger_starts[1:] - traversal.from_sep_close_bigger_starts[:-1] ) @@ -326,7 +326,7 @@ class PerformanceCounter: else: np2l = np.zeros(len(trav.target_or_target_parent_boxes), dtype=np.intp) - for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): + for itgt_box, tgt_ibox in enumerate(trav.target_boxes): tgt_box_level = trav.tree.box_levels[tgt_ibox] ncoeffs = parameters.ncoeffs_fmm_by_level[tgt_box_level] @@ -476,9 +476,9 @@ class PerformanceModel: result = self.time_result[0] if wall_time: - dependent_value = result[y_name].wall_elapsed + dependent_value = result[y_name]["wall_elapsed"] else: - dependent_value = result[y_name].process_elapsed + dependent_value = result[y_name]["process_elapsed"] independent_value = result[x_name[0]] coeff = dependent_value / independent_value @@ -490,9 +490,9 @@ class PerformanceModel: for iresult, result in enumerate(self.time_result): if wall_time: - dependent_value[iresult] = result[y_name].wall_elapsed + dependent_value[iresult] = result[y_name]["wall_elapsed"] else: - dependent_value[iresult] = result[y_name].process_elapsed + dependent_value[iresult] = result[y_name]["process_elapsed"] for icol, variable_name in enumerate(x_name): coeff_matrix[iresult, icol] = result[variable_name] @@ -688,8 +688,8 @@ class PerformanceModel: elif isinstance(entry, dict): converted_result[field_name] = TimingResult( - entry['wall_elapsed'], - entry['process_elapsed'] + wall_elapsed=entry['wall_elapsed'], + process_elapsed=entry['process_elapsed'] ) else: @@ -718,8 +718,8 @@ class PerformanceModel: elif isinstance(entry, TimingResult): current_output[field_name] = { - 'wall_elapsed': entry.wall_elapsed, - 'process_elapsed': entry.process_elapsed + 'wall_elapsed': entry.get("wall_elapsed"), + 'process_elapsed': entry.get("process_elapsed") } output.append(current_output) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index 6e9a6a316651e44e311d23fb93e22e10a90ac662..3002888f5ea0ebeaffe5cfcd5e28f99aec3cc003 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -25,7 +25,16 @@ THE SOFTWARE. import logging logger = logging.getLogger(__name__) -from pytools import ProcessLogger, Record + +try: + # Python 3 + from collections.abc import Mapping +except ImportError: + # Python 2 + from collections import Mapping + + +from pytools import ProcessLogger def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): @@ -161,7 +170,7 @@ def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): if traversal.from_sep_close_bigger_starts is not None: direct_result, timing_future = wrangler.eval_direct( - traversal.target_or_target_parent_boxes, + traversal.target_boxes, traversal.from_sep_close_bigger_starts, traversal.from_sep_close_bigger_lists, src_weights) @@ -357,22 +366,44 @@ class ExpansionWranglerInterface: # {{{ timing result -class TimingResult(Record): - """ - .. attribute:: wall_elapsed - .. attribute:: process_elapsed +class TimingResult(Mapping): + """Interface for returned timing data. + + This supports accessing timing results via a mapping interface, along with + combining results via :meth:`merge`. + + .. automethod:: merge """ - def __init__(self, wall_elapsed, process_elapsed): - Record.__init__(self, - wall_elapsed=wall_elapsed, - process_elapsed=process_elapsed) + def __init__(self, *args, **kwargs): + """See constructor for :class:`dict`.""" + self._mapping = dict(*args, **kwargs) + + def __getitem__(self, key): + return self._mapping[key] - def __add__(self, other): - return TimingResult( - self.wall_elapsed + other.wall_elapsed, - self.process_elapsed + other.process_elapsed - ) + def __iter__(self): + return iter(self._mapping) + + def __len__(self): + return len(self._mapping) + + def merge(self, other): + """Merge this result with another by adding together common fields.""" + result = {} + + for key in self: + val = self.get(key) + other_val = other.get(key) + + if val is None or other_val is None: + continue + + result[key] = val + other_val + + return type(self)(result) + + __add__ = merge # }}} @@ -408,17 +439,6 @@ class TimingRecorder(object): def add(self, description, future): self.futures[description].append(future) - def merge(self, result1, result2): - wall_elapsed = None - process_elapsed = None - - if None not in (result1.wall_elapsed, result2.wall_elapsed): - wall_elapsed = result1.wall_elapsed + result2.wall_elapsed - if None not in (result1.process_elapsed, result2.process_elapsed): - process_elapsed = result1.process_elapsed + result2.process_elapsed - - return TimingResult(wall_elapsed, process_elapsed) - def summarize(self): result = {} @@ -427,7 +447,7 @@ class TimingRecorder(object): timing_result = next(futures).result() for future in futures: - timing_result = self.merge(timing_result, future.result()) + timing_result = timing_result.merge(future.result()) result[description] = timing_result diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index c076e56e2f47470bbb70c3acc1fcf0c7da8d7142..33e9dec7d087cecb0610a35e9ef08a6daebc631d 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -41,6 +41,10 @@ __doc__ = """Integrates :mod:`boxtree` with class FMMLibExpansionWrangler(object): """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` by using pyfmmlib. + + Timing results returned by this wrangler contains the values *wall_elapsed* + and (optionally, if supported) *process_elapsed*, which measure wall time + and process time in seconds, respectively. """ # {{{ constructor diff --git a/boxtree/tools.py b/boxtree/tools.py index f390d0904c3129dc8e4ff451ea020110b10e0712..54a60d31d3a71651b549d856cf3cc775abb9b32a 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -517,14 +517,18 @@ class DummyTimingFuture(TimingFuture): @classmethod def from_timer(cls, timer): - return cls(timer.wall_elapsed, timer.process_elapsed) + return cls(wall_elapsed=timer.wall_elapsed, + process_elapsed=timer.process_elapsed) - def __init__(self, wall_elapsed, process_elapsed): - self.wall_elapsed = wall_elapsed - self.process_elapsed = process_elapsed + @classmethod + def from_op_count(cls, op_count): + return cls(ops_elapsed=op_count) + + def __init__(self, *args, **kwargs): + self._result = TimingResult(*args, **kwargs) def result(self): - return TimingResult(self.wall_elapsed, self.process_elapsed) + return self._result def done(self): return True @@ -787,4 +791,198 @@ class AllReduceCommPattern(object): # }}} + +# {{{ constant one wrangler + +class ConstantOneExpansionWrangler(object): + """This implements the 'analytical routines' for a Green's function that is + constant 1 everywhere. For 'charges' of 'ones', this should get every particle + a copy of the particle count. + + Timing results returned by this wrangler contain the field *ops_elapsed*, + which counts approximately the number of floating-point operations required. + """ + + def __init__(self, tree): + self.tree = tree + + def multipole_expansion_zeros(self): + return np.zeros(self.tree.nboxes, dtype=np.float64) + + local_expansion_zeros = multipole_expansion_zeros + + def output_zeros(self): + return np.zeros(self.tree.ntargets, dtype=np.float64) + + def _get_source_slice(self, ibox): + pstart = self.tree.box_source_starts[ibox] + return slice( + pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) + + def _get_target_slice(self, ibox): + pstart = self.tree.box_target_starts[ibox] + return slice( + pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) + + def reorder_sources(self, source_array): + return source_array[self.tree.user_source_ids] + + def reorder_potentials(self, potentials): + return potentials[self.tree.sorted_target_ids] + + @staticmethod + def timing_future(ops): + return DummyTimingFuture.from_op_count(ops) + + def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): + mpoles = self.multipole_expansion_zeros() + ops = 0 + + for ibox in source_boxes: + pslice = self._get_source_slice(ibox) + mpoles[ibox] += np.sum(src_weights[pslice]) + ops += src_weights[pslice].size + + return mpoles, self.timing_future(ops) + + def coarsen_multipoles(self, level_start_source_parent_box_nrs, + source_parent_boxes, mpoles): + tree = self.tree + ops = 0 + + # nlevels-1 is the last valid level index + # nlevels-2 is the last valid level that could have children + # + # 3 is the last relevant source_level. + # 2 is the last relevant target_level. + # (because no level 1 box will be well-separated from another) + for source_level in range(tree.nlevels-1, 2, -1): + target_level = source_level - 1 + start, stop = level_start_source_parent_box_nrs[ + target_level:target_level+2] + for ibox in source_parent_boxes[start:stop]: + for child in tree.box_child_ids[:, ibox]: + if child: + mpoles[ibox] += mpoles[child] + ops += 1 + + return mpoles, self.timing_future(ops) + + def eval_direct(self, target_boxes, neighbor_sources_starts, + neighbor_sources_lists, src_weights): + pot = self.output_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_boxes): + tgt_pslice = self._get_target_slice(tgt_ibox) + + src_sum = 0 + nsrcs = 0 + start, end = neighbor_sources_starts[itgt_box:itgt_box+2] + #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) + for src_ibox in neighbor_sources_lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_weights[src_pslice].size + + src_sum += np.sum(src_weights[src_pslice]) + + pot[tgt_pslice] = src_sum + ops += pot[tgt_pslice].size * nsrcs + + return pot, self.timing_future(ops) + + def multipole_to_local(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, + starts, lists, mpole_exps): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + contrib = 0 + #print tgt_ibox, "<-", lists[start:end] + for src_ibox in lists[start:end]: + contrib += mpole_exps[src_ibox] + ops += 1 + + local_exps[tgt_ibox] += contrib + + return local_exps, self.timing_future(ops) + + def eval_multipoles(self, + target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, + mpole_exps): + pot = self.output_zeros() + ops = 0 + + for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): + for itgt_box, tgt_ibox in \ + enumerate(target_boxes_by_source_level[level]): + tgt_pslice = self._get_target_slice(tgt_ibox) + + contrib = 0 + + start, end = ssn.starts[itgt_box:itgt_box+2] + for src_ibox in ssn.lists[start:end]: + contrib += mpole_exps[src_ibox] + + pot[tgt_pslice] += contrib + ops += pot[tgt_pslice].size * (end - start) + + return pot, self.timing_future(ops) + + def form_locals(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, starts, lists, src_weights): + local_exps = self.local_expansion_zeros() + ops = 0 + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + #print "LIST 4", tgt_ibox, "<-", lists[start:end] + contrib = 0 + nsrcs = 0 + for src_ibox in lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + nsrcs += src_weights[src_pslice].size + + contrib += np.sum(src_weights[src_pslice]) + + local_exps[tgt_ibox] += contrib + ops += nsrcs + + return local_exps, self.timing_future(ops) + + def refine_locals(self, level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, local_exps): + ops = 0 + + for target_lev in range(self.tree.nlevels): + start, stop = level_start_target_or_target_parent_box_nrs[ + target_lev:target_lev+2] + for ibox in target_or_target_parent_boxes[start:stop]: + local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] + ops += 1 + + return local_exps, self.timing_future(ops) + + def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): + pot = self.output_zeros() + ops = 0 + + for ibox in target_boxes: + tgt_pslice = self._get_target_slice(ibox) + pot[tgt_pslice] += local_exps[ibox] + ops += pot[tgt_pslice].size + + return pot, self.timing_future(ops) + + def finalize_potentials(self, potentials): + return potentials + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 9ecd7a93ec1978eac5b0cefcc1f620a5cfe02465..7c49ce8cfd8f008089911ece2c303473757dcb5d 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -23,7 +23,7 @@ THE SOFTWARE. """ import numpy as np -from pytools import Record, memoize_method, memoize_in +from pytools import Record, memoize_method import pyopencl as cl import pyopencl.array # noqa import pyopencl.cltypes # noqa @@ -1165,6 +1165,187 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) # }}} +# {{{ list merger + +LIST_MERGER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + + box_id_t *output_to_input_box, + + %for ilist in range(nlists): + box_id_t *list${ilist}_starts, + %endfor + + %if not write_counts: + %for ilist in range(nlists): + const box_id_t *list${ilist}_lists, + %endfor + const box_id_t *new_starts, + %endif + + /* output: */ + + %if not write_counts: + box_id_t *new_lists, + %else: + box_id_t *new_counts, + %endif + """, + + operation=r"""//CL:mako// + /* Compute output and input indices. */ + const box_id_t ioutput_box = i; + const box_id_t ibox = output_to_input_box[ioutput_box]; + + /* Count the size of the input at the current index. */ + %for ilist in range(nlists): + const box_id_t list${ilist}_start = list${ilist}_starts[ibox]; + const box_id_t list${ilist}_count = + list${ilist}_starts[ibox + 1] - list${ilist}_start; + %endfor + + /* Update the counts or copy the elements. */ + %if write_counts: + if (ioutput_box == 0) + new_counts[0] = 0; + + new_counts[ioutput_box + 1] = + %for ilist in range(nlists): + + list${ilist}_count + %endfor + ; + %else: + box_id_t cur_idx = new_starts[ioutput_box]; + + %for ilist in range(nlists): + for (box_id_t j = 0; j < list${ilist}_count; ++j) + { + new_lists[cur_idx++] = + list${ilist}_lists[list${ilist}_start + j]; + } + %endfor + %endif + """, + + name="merge_lists") + + +class _IndexStyle: + TARGET_BOXES = 0 + TARGET_OR_TARGET_PARENT_BOXES = 1 + + +class _ListMerger(object): + """Utility class for combining box lists optionally changing indexing style.""" + + def __init__(self, context, box_id_dtype): + self.context = context + self.box_id_dtype = box_id_dtype + + @memoize_method + def get_list_merger_kernel(self, nlists, write_counts): + """ + :arg nlists: Number of input lists + :arg write_counts: A :class:`bool`, indicating whether to generate a + kernel that produces box counts or box lists + """ + assert nlists >= 1 + + return LIST_MERGER_TEMPLATE.build( + self.context, + type_aliases=( + ("box_id_t", self.box_id_dtype), + ), + var_values=( + ("nlists", nlists), + ("write_counts", write_counts), + )) + + def __call__(self, queue, input_starts, input_lists, input_index_style, + output_index_style, target_boxes, target_or_target_parent_boxes, + nboxes, debug=False, wait_for=[]): + """ + :arg input_starts: Starts arrays of input + :arg input_lists: Lists arrays of input + :arg input_index_style: A :class:`_IndexStyle` + :arg output_index_style: A :class:`_IndexStyle` + :returns: A pair *results_dict, event*, where *results_dict* + contains entries *starts* and *lists* + """ + + if ( + output_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and input_index_style == _IndexStyle.TARGET_BOXES): + raise ValueError( + "unsupported: merging a list indexed by target boxes " + "into a list indexed by target or target parent boxes") + + ntarget_boxes = len(target_boxes) + ntarget_or_ntarget_parent_boxes = len(target_or_target_parent_boxes) + + noutput_boxes = (ntarget_boxes + if output_index_style == _IndexStyle.TARGET_BOXES + else ntarget_or_ntarget_parent_boxes) + + if ( + input_index_style == _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES + and output_index_style == _IndexStyle.TARGET_BOXES): + from boxtree.tools import reverse_index_array + target_or_target_parent_boxes_from_all_boxes = reverse_index_array( + target_or_target_parent_boxes, target_size=nboxes, + queue=queue) + target_or_target_parent_boxes_from_target_boxes = cl.array.take( + target_or_target_parent_boxes_from_all_boxes, + target_boxes, queue=queue) + + output_to_input_box = target_or_target_parent_boxes_from_target_boxes + else: + output_to_input_box = cl.array.arange( + queue, noutput_boxes, dtype=self.box_id_dtype) + + new_counts = cl.array.empty(queue, noutput_boxes+1, self.box_id_dtype) + + assert len(input_starts) == len(input_lists) + nlists = len(input_starts) + + evt = self.get_list_merger_kernel(nlists, True)(*( + # input: + (output_to_input_box,) + + input_starts + # output: + + (new_counts,)), + range=slice(noutput_boxes), + queue=queue, + wait_for=wait_for) + + new_starts = cl.array.cumsum(new_counts) + del new_counts + + new_lists = cl.array.empty( + queue, + int(new_starts[-1].get()), + self.box_id_dtype) + + new_lists.fill(999999999) + + evt = self.get_list_merger_kernel(nlists, False)(*( + # input: + (output_to_input_box,) + + input_starts + + input_lists + + (new_starts,) + # output: + + (new_lists,)), + range=slice(noutput_boxes), + queue=queue, + wait_for=[evt]) + + return dict(starts=new_starts, lists=new_lists), evt + +# }}} + + # {{{ traversal info (output) class FMMTraversalInfo(DeviceDataRecord): @@ -1361,7 +1542,7 @@ class FMMTraversalInfo(DeviceDataRecord): which boxes are used with the interaction list entries of :attr:`from_sep_smaller_by_level`. ``target_boxes_sep_smaller_by_source_level[i]`` has length - ``from_sep_smaller_by_level[i].num_nonempty_lists`. + ``from_sep_smaller_by_level[i].num_nonempty_lists``. .. attribute:: from_sep_smaller_by_level @@ -1405,7 +1586,10 @@ class FMMTraversalInfo(DeviceDataRecord): interactions between boxes that would ordinarily be handled through "List 4", but must be evaluated specially/directly because of :ref:`extent`. - Indexed like :attr:`target_or_target_parent_boxes`. See :ref:`csr`. + *from_sep_bigger_starts* is indexed like + :attr:`target_or_target_parent_boxes`. Similar to the other "close" lists, + *from_sep_close_bigger_starts* is indexed like :attr:`target_boxes`. See + :ref:`csr`. .. attribute:: from_sep_bigger_starts @@ -1417,11 +1601,17 @@ class FMMTraversalInfo(DeviceDataRecord): .. attribute:: from_sep_close_bigger_starts - ``box_id_t [ntarget_or_target_parent_boxes+1]`` (or *None*) + ``box_id_t [ntarget_boxes+1]`` (or *None*) .. attribute:: from_sep_close_bigger_lists ``box_id_t [*]`` (or *None*) + + .. versionchanged:: 2018.2 + + Changed index style of *from_sep_close_bigger_starts* from + :attr:`target_or_target_parent_boxes` to :attr:`target_boxes`. + """ # {{{ "close" list merging -> "unified list 1" @@ -1434,149 +1624,38 @@ class FMMTraversalInfo(DeviceDataRecord): *None*. """ - from boxtree.tools import reverse_index_array - target_or_target_parent_boxes_from_all_boxes = reverse_index_array( - self.target_or_target_parent_boxes, target_size=self.tree.nboxes, - queue=queue) - target_or_target_parent_boxes_from_tgt_boxes = cl.array.take( - target_or_target_parent_boxes_from_all_boxes, - self.target_boxes, queue=queue) - - del target_or_target_parent_boxes_from_all_boxes - - @memoize_in(self, "merge_close_lists_kernel") - def get_new_nb_sources_knl(write_counts): - from pyopencl.elementwise import ElementwiseTemplate - return ElementwiseTemplate("""//CL:mako// - /* input: */ - box_id_t *target_or_target_parent_boxes_from_tgt_boxes, - box_id_t *neighbor_source_boxes_starts, - box_id_t *from_sep_close_smaller_starts, - box_id_t *from_sep_close_bigger_starts, - - %if not write_counts: - box_id_t *neighbor_source_boxes_lists, - box_id_t *from_sep_close_smaller_lists, - box_id_t *from_sep_close_bigger_lists, - - box_id_t *new_neighbor_source_boxes_starts, - %endif - - /* output: */ - - %if write_counts: - box_id_t *new_neighbor_source_boxes_counts, - %else: - box_id_t *new_neighbor_source_boxes_lists, - %endif - """, - """//CL:mako// - box_id_t itgt_box = i; - box_id_t itarget_or_target_parent_box = - target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; - - box_id_t neighbor_source_boxes_start = - neighbor_source_boxes_starts[itgt_box]; - box_id_t neighbor_source_boxes_count = - neighbor_source_boxes_starts[itgt_box + 1] - - neighbor_source_boxes_start; - - box_id_t from_sep_close_smaller_start = - from_sep_close_smaller_starts[itgt_box]; - box_id_t from_sep_close_smaller_count = - from_sep_close_smaller_starts[itgt_box + 1] - - from_sep_close_smaller_start; - - box_id_t from_sep_close_bigger_start = - from_sep_close_bigger_starts[itarget_or_target_parent_box]; - box_id_t from_sep_close_bigger_count = - from_sep_close_bigger_starts[itarget_or_target_parent_box + 1] - - from_sep_close_bigger_start; - - %if write_counts: - if (itgt_box == 0) - new_neighbor_source_boxes_counts[0] = 0; - - new_neighbor_source_boxes_counts[itgt_box + 1] = - neighbor_source_boxes_count - + from_sep_close_smaller_count - + from_sep_close_bigger_count - ; - %else: - - box_id_t cur_idx = new_neighbor_source_boxes_starts[itgt_box]; - - #define COPY_FROM(NAME) \ - for (box_id_t i = 0; i < NAME##_count; ++i) \ - new_neighbor_source_boxes_lists[cur_idx++] = \ - NAME##_lists[NAME##_start+i]; - - COPY_FROM(neighbor_source_boxes) - COPY_FROM(from_sep_close_smaller) - COPY_FROM(from_sep_close_bigger) - - %endif - """).build( - queue.context, - type_aliases=( - ("box_id_t", self.tree.box_id_dtype), - ), - var_values=( - ("write_counts", write_counts), - ) - ) - - ntarget_boxes = len(self.target_boxes) - new_neighbor_source_boxes_counts = cl.array.empty( - queue, ntarget_boxes+1, self.tree.box_id_dtype) - get_new_nb_sources_knl(True)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - - # output: - new_neighbor_source_boxes_counts, - range=slice(ntarget_boxes), - queue=queue) - - new_neighbor_source_boxes_starts = cl.array.cumsum( - new_neighbor_source_boxes_counts) - del new_neighbor_source_boxes_counts - - new_neighbor_source_boxes_lists = cl.array.empty( - queue, - int(new_neighbor_source_boxes_starts[ntarget_boxes].get()), - self.tree.box_id_dtype) - - new_neighbor_source_boxes_lists.fill(999999999) - - get_new_nb_sources_knl(False)( - # input: - target_or_target_parent_boxes_from_tgt_boxes, - - self.neighbor_source_boxes_starts, - self.from_sep_close_smaller_starts, - self.from_sep_close_bigger_starts, - self.neighbor_source_boxes_lists, - self.from_sep_close_smaller_lists, - self.from_sep_close_bigger_lists, - - new_neighbor_source_boxes_starts, - - # output: - new_neighbor_source_boxes_lists, - range=slice(ntarget_boxes), - queue=queue) + list_merger = _ListMerger(queue.context, self.tree.box_id_dtype) + + result, evt = ( + list_merger( + queue, + # starts + (self.neighbor_source_boxes_starts, + self.from_sep_close_smaller_starts, + self.from_sep_close_bigger_starts), + # lists + (self.neighbor_source_boxes_lists, + self.from_sep_close_smaller_lists, + self.from_sep_close_bigger_lists), + # input index styles + _IndexStyle.TARGET_BOXES, + # output index style + _IndexStyle.TARGET_BOXES, + # box and tree data + self.target_boxes, + self.target_or_target_parent_boxes, + self.tree.nboxes, + debug)) + + cl.wait_for_events([evt]) return self.copy( - neighbor_source_boxes_starts=new_neighbor_source_boxes_starts, - neighbor_source_boxes_lists=new_neighbor_source_boxes_lists, - from_sep_close_smaller_starts=None, - from_sep_close_smaller_lists=None, - from_sep_close_bigger_starts=None, - from_sep_close_bigger_lists=None) + neighbor_source_boxes_starts=result["starts"], + neighbor_source_boxes_lists=result["lists"], + from_sep_close_smaller_starts=None, + from_sep_close_smaller_lists=None, + from_sep_close_bigger_starts=None, + from_sep_close_bigger_lists=None) # }}} @@ -2164,8 +2243,36 @@ class FMMTraversalBuilder: from_sep_bigger = result["from_sep_bigger"] if with_extent: - from_sep_close_bigger_starts = result["from_sep_close_bigger"].starts - from_sep_close_bigger_lists = result["from_sep_close_bigger"].lists + # These are indexed by target_or_target_parent boxes; we rewrite + # them to be indexed by target_boxes. + from_sep_close_bigger_starts_raw = result["from_sep_close_bigger"].starts + from_sep_close_bigger_lists_raw = result["from_sep_close_bigger"].lists + + list_merger = _ListMerger(queue.context, tree.box_id_dtype) + result, evt = list_merger( + queue, + # starts + (from_sep_close_bigger_starts_raw,), + # lists + (from_sep_close_bigger_lists_raw,), + # input index style + _IndexStyle.TARGET_OR_TARGET_PARENT_BOXES, + # output index style + _IndexStyle.TARGET_BOXES, + # box and tree data + target_boxes, + target_or_target_parent_boxes, + tree.nboxes, + debug, + wait_for=wait_for) + + wait_for = [evt] + + del from_sep_close_bigger_starts_raw + del from_sep_close_bigger_lists_raw + + from_sep_close_bigger_starts = result["starts"] + from_sep_close_bigger_lists = result["lists"] else: from_sep_close_bigger_starts = None from_sep_close_bigger_lists = None diff --git a/boxtree/version.py b/boxtree/version.py index aac009869c50439ea6faf456d7ab641f1f8e915d..f5c7ef59fd0b28718e7d3d4b38e0d3e91e2e1949 100644 --- a/boxtree/version.py +++ b/boxtree/version.py @@ -1,2 +1,2 @@ -VERSION = (2018, 1) +VERSION = (2018, 2) VERSION_TEXT = ".".join(str(i) for i in VERSION) diff --git a/boxtree/visualization.py b/boxtree/visualization.py index c487a965ca138e028cf3a62aca9c42b26556727a..e5fb19de8fbfe51e6003a6f346bcfca076f890db 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -268,7 +268,7 @@ def draw_box_lists(tree_plotter, traversal, ibox): _draw_box_list(tree_plotter, ibox, traversal.from_sep_close_bigger_starts, traversal.from_sep_close_bigger_lists, - key_to_box=traversal.target_or_target_parent_boxes, + key_to_box=traversal.target_boxes, facecolor="purple", hatch=".") # }}} diff --git a/doc/misc.rst b/doc/misc.rst index 1086d8b4b284827a6adf0ff5728fa3626af16c7b..a4c008fd9a67cfeb62438bcd00e9f7fa825395e8 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -24,13 +24,18 @@ for instructions. User-visible Changes ==================== -Version 2018.1 +Version 2018.2 -------------- .. note:: This version is currently under development. You can get snapshots from boxtree's `git repository `_ +* Changed index style of the *from_sep_close_bigger_starts* interaction list. + +Version 2018.1 +-------------- + * Added *timing_data* parameter to FMM driver. Version 2013.1 diff --git a/test/test_fmm.py b/test/test_fmm.py index cb0b061fc46a46aeefb47dc48460e9f13fb6c49b..80417212e79b0d9aa12e8fd6d1caed8cba954b47 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -36,7 +36,8 @@ from boxtree.tools import ( # noqa: F401 make_normal_particle_array as p_normal, make_surface_particle_array as p_surface, make_uniform_particle_array as p_uniform, - particle_array_to_host) + particle_array_to_host, + ConstantOneExpansionWrangler) import logging logger = logging.getLogger(__name__) @@ -44,186 +45,13 @@ logger = logging.getLogger(__name__) # {{{ fmm interaction completeness test -from boxtree.tools import return_timing_data - - -class ConstantOneExpansionWrangler(object): - """This implements the 'analytical routines' for a Green's function that is - constant 1 everywhere. For 'charges' of 'ones', this should get every particle - a copy of the particle count. - """ - - def __init__(self, tree): - self.tree = tree - - def multipole_expansion_zeros(self): - return np.zeros(self.tree.nboxes, dtype=np.float64) - - local_expansion_zeros = multipole_expansion_zeros - - def potential_zeros(self): - return np.zeros(self.tree.ntargets, dtype=np.float64) - - def _get_source_slice(self, ibox): - pstart = self.tree.box_source_starts[ibox] - return slice( - pstart, pstart + self.tree.box_source_counts_nonchild[ibox]) - - def _get_target_slice(self, ibox): - pstart = self.tree.box_target_starts[ibox] - return slice( - pstart, pstart + self.tree.box_target_counts_nonchild[ibox]) - - def reorder_sources(self, source_array): - return source_array[self.tree.user_source_ids] - - def reorder_potentials(self, potentials): - return potentials[self.tree.sorted_target_ids] - - @return_timing_data - def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): - mpoles = self.multipole_expansion_zeros() - for ibox in source_boxes: - pslice = self._get_source_slice(ibox) - mpoles[ibox] += np.sum(src_weights[pslice]) - - return mpoles - - @return_timing_data - def coarsen_multipoles(self, level_start_source_parent_box_nrs, - source_parent_boxes, mpoles): - tree = self.tree - - # nlevels-1 is the last valid level index - # nlevels-2 is the last valid level that could have children - # - # 3 is the last relevant source_level. - # 2 is the last relevant target_level. - # (because no level 1 box will be well-separated from another) - for source_level in range(tree.nlevels-1, 2, -1): - target_level = source_level - 1 - start, stop = level_start_source_parent_box_nrs[ - target_level:target_level+2] - for ibox in source_parent_boxes[start:stop]: - for child in tree.box_child_ids[:, ibox]: - if child: - mpoles[ibox] += mpoles[child] - - return mpoles - - @return_timing_data - def eval_direct(self, target_boxes, neighbor_sources_starts, - neighbor_sources_lists, src_weights): - pot = self.potential_zeros() - - for itgt_box, tgt_ibox in enumerate(target_boxes): - tgt_pslice = self._get_target_slice(tgt_ibox) - - src_sum = 0 - start, end = neighbor_sources_starts[itgt_box:itgt_box+2] - #print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end]) - for src_ibox in neighbor_sources_lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - src_sum += np.sum(src_weights[src_pslice]) - - pot[tgt_pslice] = src_sum - - return pot - - @return_timing_data - def multipole_to_local(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, - starts, lists, mpole_exps): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - contrib = 0 - #print tgt_ibox, "<-", lists[start:end] - for src_ibox in lists[start:end]: - contrib += mpole_exps[src_ibox] - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def eval_multipoles(self, - target_boxes_by_source_level, from_sep_smaller_nonsiblings_by_level, - mpole_exps): - pot = self.potential_zeros() - - for level, ssn in enumerate(from_sep_smaller_nonsiblings_by_level): - for itgt_box, tgt_ibox in \ - enumerate(target_boxes_by_source_level[level]): - tgt_pslice = self._get_target_slice(tgt_ibox) - - contrib = 0 - - start, end = ssn.starts[itgt_box:itgt_box+2] - for src_ibox in ssn.lists[start:end]: - contrib += mpole_exps[src_ibox] - - pot[tgt_pslice] += contrib - - return pot - - @return_timing_data - def form_locals(self, - level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, starts, lists, src_weights): - local_exps = self.local_expansion_zeros() - - for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): - start, end = starts[itgt_box:itgt_box+2] - - #print "LIST 4", tgt_ibox, "<-", lists[start:end] - contrib = 0 - for src_ibox in lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - - contrib += np.sum(src_weights[src_pslice]) - - local_exps[tgt_ibox] += contrib - - return local_exps - - @return_timing_data - def refine_locals(self, level_start_target_or_target_parent_box_nrs, - target_or_target_parent_boxes, local_exps): - - for target_lev in range(1, self.tree.nlevels): - start, stop = level_start_target_or_target_parent_box_nrs[ - target_lev:target_lev+2] - for ibox in target_or_target_parent_boxes[start:stop]: - local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]] - - return local_exps - - @return_timing_data - def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.potential_zeros() - - for ibox in target_boxes: - tgt_pslice = self._get_target_slice(ibox) - pot[tgt_pslice] += local_exps[ibox] - - return pot - - def finalize_potentials(self, potentials): - return potentials - - class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( ConstantOneExpansionWrangler): def __init__(self, tree, filtered_targets): ConstantOneExpansionWrangler.__init__(self, tree) self.filtered_targets = filtered_targets - def potential_zeros(self): + def output_zeros(self): return np.zeros(self.filtered_targets.nfiltered_targets, dtype=np.float64) def _get_target_slice(self, ibox):