diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index f9d116f88ce554d5173a25b3aceff5d9d8d41f4b..ba71c8a33d3c8fc6d8a1d2d4a1749d0f05ecfbd7 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -26,7 +26,7 @@ THE SOFTWARE. from six.moves import range, zip import numpy as np -from pytools import memoize_method +from pytools import memoize_method, RecordWithoutPickling import pyopencl as cl import pyopencl.array # noqa from functools import partial @@ -36,7 +36,32 @@ import logging logger = logging.getLogger(__name__) +class _LevelLoopData(RecordWithoutPickling): + + __slots__ = """ + morton_bin_counts + morton_nrs + box_start_flags + srcntgt_box_ids + split_box_ids + box_morton_bin_counts + box_srcntgt_starts + box_parent_ids + box_child_ids + box_centers + box_levels + box_srcntgt_counts_cumul + box_has_children + force_split_box + level_start_box_nrs + level_used_box_counts + have_oversize_split_box + have_upper_level_split_box + """.split() + + class TreeBuilder(object): + def __init__(self, context): """ :arg context: A :class:`pyopencl.Context`. @@ -71,6 +96,730 @@ class TreeBuilder(object): self.morton_nr_dtype, self.box_level_dtype, kind=kind) + @memoize_method + def axis_names(self, dimensions): + from boxtree.tools import AXIS_NAMES + return AXIS_NAMES[:dimensions] + + # {{{ process refine weights + + def process_refine_weights( + self, queue, allocator, refine_weights, max_leaf_refine_weight, + max_particles_in_box, nsrcntgts): + + prep_events = [] + + from boxtree.tree_build_kernels import refine_weight_dtype + + specified_max_particles_in_box = max_particles_in_box is not None + specified_refine_weights = refine_weights is not None and \ + max_leaf_refine_weight is not None + + if specified_max_particles_in_box and specified_refine_weights: + raise ValueError("may only specify one of max_particles_in_box and " + "refine_weights/max_leaf_refine_weight") + elif not specified_max_particles_in_box and not specified_refine_weights: + raise ValueError("must specify either max_particles_in_box or " + "refine_weights/max_leaf_refine_weight") + elif specified_max_particles_in_box: + refine_weights = ( + cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) + event, = refine_weights.events + prep_events.append(event) + max_leaf_refine_weight = max_particles_in_box + elif specified_refine_weights: + if refine_weights.dtype != refine_weight_dtype: + raise TypeError("refine_weights must have dtype '%s'" + % refine_weight_dtype) + + if max_leaf_refine_weight < cl.array.max(refine_weights).get(): + raise ValueError( + "entries of refine_weights cannot exceed max_leaf_refine_weight") + if 0 > cl.array.min(refine_weights).get(): + raise ValueError("all entries of refine_weights must be nonnegative") + if max_leaf_refine_weight <= 0: + raise ValueError("max_leaf_refine_weight must be positive") + + total_refine_weight = cl.array.sum( + refine_weights, dtype=np.dtype(np.int64)).get() + + return ( + refine_weights, max_leaf_refine_weight, total_refine_weight, prep_events) + + # }}} + + # {{{ find and process bounding box + + def find_bounding_box_and_root_extent(self, srcntgts, srcntgt_radii, + coord_dtype, wait_for): + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() + + dimensions = srcntgts.shape[0] + axis_names = self.axis_names(dimensions) + + root_extent = max( + bbox["max_"+ax] - bbox["min_"+ax] + for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_"+ax] + + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_"+ax] = bbox_max[i] + + return bbox, (bbox_min, bbox_max), root_extent + + # }}} + + # {{{ allocation helpers + + def make_empty_array_allocator(self, queue, allocator): + return partial(cl.array.empty, queue, allocator=allocator) + + def make_zeros_array_allocator(self, queue, allocator): + + def zeros(shape, dtype): + result = cl.array.zeros(queue, shape, dtype, allocator=allocator) + event, = result.events + return result, event + + return zeros + + # }}} + + # {{{ allocate level loop data + + def allocate_level_loop_data(self, queue, dimensions, allocator, nsrcntgts, + bbox, nboxes_guess, knl_info, box_id_dtype, + particle_id_dtype, coord_dtype, wait_for): + empty = self.make_empty_array_allocator(queue, allocator) + zeros = self.make_zeros_array_allocator(queue, allocator) + axis_names = self.axis_names(dimensions) + + prep_events = [] + + # {{{ allocate data + + logger.debug("allocating memory") + + # box-local morton bin counts for each particle at the current level + # only valid from scan -> split'n'sort + morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + + # (local) morton nrs for each particle at the current level + # only valid from scan -> split'n'sort + morton_nrs = empty(nsrcntgts, dtype=self.morton_nr_dtype) + + # 0/1 segment flags + # invariant to sorting once set + # (particles are only reordered within a box) + # valid throughout computation + box_start_flags, evt = zeros(nsrcntgts, dtype=np.int8) + prep_events.append(evt) + srcntgt_box_ids, evt = zeros(nsrcntgts, dtype=box_id_dtype) + prep_events.append(evt) + + # /!\ IMPORTANT + # + # If you're allocating an array here that depends on nboxes_guess, or if + # your array contains box numbers, you have to write code for the + # following down below as well: + # + # * You *must* write reallocation code to handle box renumbering and + # reallocation triggered at the top of the level loop. + # + # * If your array persists after the level loop, you *must* write code + # to handle box renumbering and reallocation triggered by the box + # pruning step. + + split_box_ids, evt = zeros(nboxes_guess, dtype=box_id_dtype) + prep_events.append(evt) + + # per-box morton bin counts + box_morton_bin_counts, evt = zeros(nboxes_guess, + dtype=knl_info.morton_bin_count_dtype) + prep_events.append(evt) + + # particle# at which each box starts + box_srcntgt_starts, evt = zeros(nboxes_guess, dtype=particle_id_dtype) + prep_events.append(evt) + + # pointer to parent box + box_parent_ids, evt = zeros(nboxes_guess, dtype=box_id_dtype) + prep_events.append(evt) + + # pointer to child box, by morton number + box_child_ids, evts = zip( + *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) + prep_events.extend(evts) + + # box centers, by dimension + box_centers, evts = zip( + *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) + prep_events.extend(evts) + + # Initialize box_centers[0] to contain the root box's center + for d, (ax, evt) in enumerate(zip(axis_names, evts)): + center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 + box_centers[d][0].fill(center_ax, wait_for=[evt]) + + # box -> level map + box_levels, evt = zeros(nboxes_guess, self.box_level_dtype) + prep_events.append(evt) + + # number of particles in each box + # needs to be globally initialized because empty boxes never get touched + box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) + prep_events.append(evt) + + # Initalize box 0 to contain all particles + box_srcntgt_counts_cumul[0].fill( + nsrcntgts, queue=queue, wait_for=[evt]) + + # box -> whether the box has a child. FIXME: use smaller integer type + box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) + prep_events.append(evt) + + # box -> whether the box needs a splitting to enforce level restriction. + # FIXME: use smaller integer type + force_split_box, evt = zeros(nboxes_guess + if knl_info.level_restrict + else 0, dtype=np.dtype(np.int32)) + prep_events.append(evt) + + # set parent of root box to itself + evt = cl.enqueue_copy( + queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) + prep_events.append(evt) + + nlevels_max = np.iinfo(self.box_level_dtype).max + + # level -> starting box on level + level_start_box_nrs, evt = zeros(nlevels_max, dtype=box_id_dtype) + prep_events.append(evt) + + # level -> number of used boxes on level + level_used_box_counts, evt = zeros(nlevels_max, dtype=box_id_dtype) + prep_events.append(evt) + + have_oversize_split_box, evt = zeros((), np.int32) + prep_events.append(evt) + + # True if and only if the level restrict kernel found a box to split in + # order to enforce level restriction. + have_upper_level_split_box, evt = zeros((), np.int32) + prep_events.append(evt) + + # }}} + + return _LevelLoopData( + morton_bin_counts=morton_bin_counts, + morton_nrs=morton_nrs, + box_start_flags=box_start_flags, + srcntgt_box_ids=srcntgt_box_ids, + split_box_ids=split_box_ids, + box_morton_bin_counts=box_morton_bin_counts, + box_srcntgt_starts=box_srcntgt_starts, + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_srcntgt_counts_cumul=box_srcntgt_counts_cumul, + box_has_children=box_has_children, + force_split_box=force_split_box, + level_start_box_nrs=level_start_box_nrs, + level_used_box_counts=level_used_box_counts, + have_oversize_split_box=have_oversize_split_box, + have_upper_level_split_box=have_upper_level_split_box + ), prep_events + + # }}} + + # {{{ reallocate level loop data + + def reallocate_level_loop_data_if_needed(self, queue, dimensions, + allocator, level, lld, level_start_box_nrs, level_used_box_counts, + new_level_used_box_counts, new_level_leaf_counts, nboxes_guess, + lr_lookbehind, knl_info, debug, wait_for): + + def fin_debug(s): + if debug: + queue.finish() + + logger.debug(s) + + # The algorithm for deciding on level sizes is as follows: + # 1. Compute the minimal necessary size of each level, including the + # new level being created. + # 2. If level restricting, add padding to the new level being created. + # 3. Check if there is enough existing space for each level. + # 4. If any level does not have sufficient space, reallocate all levels: + # 4a. Compute new sizes of upper levels + # 4b. If level restricting, add padding to all levels. + + curr_upper_level_lengths = np.diff(level_start_box_nrs) + minimal_upper_level_lengths = np.max( + [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) + minimal_new_level_length = new_level_used_box_counts[-1] + + # Allocate extra space at the end of the current level for higher + # level leaves that may be split later. + # + # If there are no further levels to split (i.e. + # have_oversize_split_box = 0), then we do not need to allocate any + # extra space, since no new leaves can be created at the bottom + # level. + if knl_info.level_restrict and lld.have_oversize_split_box.get(): + # Currently undocumented. + minimal_new_level_length += sum( + 2**(l*dimensions) * new_level_leaf_counts[level - l] + for l in range(1, 1 + min(level, lr_lookbehind))) + + nboxes_minimal = \ + sum(minimal_upper_level_lengths) + minimal_new_level_length + + needs_renumbering = \ + (curr_upper_level_lengths < minimal_upper_level_lengths).any() + + # {{{ prepare for reallocation/renumbering + + if needs_renumbering: + assert knl_info.level_restrict + + # {{{ compute new level_start_box_nrs + + # Represents the amount of padding needed for upper levels. + upper_level_padding = np.zeros(level, dtype=int) + + # Recompute the level padding. + for ulevel in range(level): + upper_level_padding[ulevel] = sum( + 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] + for l in range( + 1, 1 + min(ulevel, lr_lookbehind))) + + new_upper_level_unused_box_counts = np.max( + [upper_level_padding, + minimal_upper_level_lengths - new_level_used_box_counts[:-1]], + axis=0) + + new_level_start_box_nrs = np.empty(level + 1, dtype=int) + new_level_start_box_nrs[0] = 0 + new_level_start_box_nrs[1:] = np.cumsum( + new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) + + assert not (level_start_box_nrs == new_level_start_box_nrs).all() + + # }}} + + # {{{ set up reallocators + + old_box_count = level_start_box_nrs[-1] + # Where should I put this box? + box_id_dtype = lld.level_start_box_nrs.dtype + dst_box_id = cl.array.empty(queue, + shape=old_box_count, dtype=box_id_dtype) + + for level_start, new_level_start, level_len in zip( + level_start_box_nrs, new_level_start_box_nrs, + curr_upper_level_lengths): + dst_box_id[level_start:level_start + level_len] = \ + cl.array.arange(queue, + new_level_start, + new_level_start + level_len, + dtype=box_id_dtype) + + wait_for.extend(dst_box_id.events) + + realloc_array = partial(self.gappy_copy_and_map, + dst_indices=dst_box_id, range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial(self.gappy_copy_and_map, + dst_indices=dst_box_id, map_values=dst_box_id, + range=slice(old_box_count), debug=debug) + renumber_array = partial(self.map_values_kernel, dst_box_id) + + # }}} + + # Update level_start_box_nrs. This will be the + # level_start_box_nrs for the reallocated data. + + level_start_box_nrs = list(new_level_start_box_nrs) + lld.level_start_box_nrs[:level + 1] = \ + np.array(new_level_start_box_nrs, dtype=box_id_dtype) + level_start_box_nrs_updated = True + wait_for.extend(lld.level_start_box_nrs.events) + + nboxes_new = level_start_box_nrs[-1] + minimal_new_level_length + + del new_level_start_box_nrs + else: + from boxtree.tools import realloc_array + realloc_and_renumber_array = realloc_array + renumber_array = None + level_start_box_nrs_updated = False + nboxes_new = nboxes_minimal + + del nboxes_minimal + + # }}} + + # {{{ reallocate and/or renumber boxes if necessary + + needs_realloc = level_start_box_nrs_updated or nboxes_new > nboxes_guess + + if needs_realloc: + fin_debug("starting nboxes_guess increase") + + while nboxes_guess < nboxes_new: + nboxes_guess *= 2 + + def my_realloc_nocopy(ary): + return cl.array.empty(queue, allocator=allocator, + shape=nboxes_guess, dtype=ary.dtype) + + def my_realloc_zeros_nocopy(ary): + result = cl.array.zeros(queue, allocator=allocator, + shape=nboxes_guess, dtype=ary.dtype) + return result, result.events[0] + + my_realloc = partial(realloc_array, + queue, allocator, nboxes_guess, wait_for=wait_for) + my_realloc_zeros = partial(realloc_array, + queue, allocator, nboxes_guess, zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, + queue, allocator, nboxes_guess, zero_fill=True, + wait_for=wait_for) + + resize_events = [] + + split_box_ids = my_realloc_nocopy(lld.split_box_ids) + + # *Most*, but not *all* of the values in this array are + # rewritten when the morton scan is redone. Specifically, + # only the box morton bin counts of boxes on the level + # currently being processed are written-but we need to + # retain the box morton bin counts from the higher levels. + box_morton_bin_counts, evt = my_realloc_zeros( + lld.box_morton_bin_counts) + resize_events.append(evt) + + # force_split_box is unused unless level restriction is enabled. + if knl_info.level_restrict: + force_split_box, evt = my_realloc_zeros(lld.force_split_box) + resize_events.append(evt) + else: + force_split_box = lld.force_split_box + + box_srcntgt_starts, evt = my_realloc_zeros(lld.box_srcntgt_starts) + resize_events.append(evt) + + box_srcntgt_counts_cumul, evt = \ + my_realloc_zeros(lld.box_srcntgt_counts_cumul) + resize_events.append(evt) + + box_has_children, evt = my_realloc_zeros(lld.box_has_children) + resize_events.append(evt) + + box_centers, evts = zip( + *(my_realloc(ary) for ary in lld.box_centers)) + resize_events.extend(evts) + + box_child_ids, evts = zip( + *(my_realloc_zeros_and_renumber(ary) + for ary in lld.box_child_ids)) + resize_events.extend(evts) + + box_parent_ids, evt = my_realloc_zeros_and_renumber(lld.box_parent_ids) + resize_events.append(evt) + + if not level_start_box_nrs_updated: + box_levels, evt = my_realloc(lld.box_levels) + resize_events.append(evt) + else: + box_levels, evt = my_realloc_zeros_nocopy(lld.box_levels) + cl.wait_for_events([evt]) + for box_level, (level_start, level_end) in enumerate(zip( + level_start_box_nrs, level_start_box_nrs[1:])): + box_levels[level_start:level_end].fill(box_level) + resize_events.extend(lld.box_levels.events) + + if level_start_box_nrs_updated: + srcntgt_box_ids, evt = renumber_array(lld.srcntgt_box_ids) + resize_events.append(evt) + else: + srcntgt_box_ids = lld.srcntgt_box_ids + + lld = lld.copy( + split_box_ids=split_box_ids, + box_morton_bin_counts=box_morton_bin_counts, + force_split_box=force_split_box, + box_srcntgt_starts=box_srcntgt_starts, + box_srcntgt_counts_cumul=box_srcntgt_counts_cumul, + box_has_children=box_has_children, + box_centers=box_centers, + box_child_ids=box_child_ids, + box_parent_ids=box_parent_ids, + box_levels=box_levels, + srcntgt_box_ids=srcntgt_box_ids) + + cl.wait_for_events(resize_events) + + # }}} + + return lld, nboxes_guess, nboxes_new, level_start_box_nrs, needs_realloc + + # }}} + + # {{{ extract common args to kernels + + def get_common_args(self, lld, refine_weights, max_leaf_refine_weight, + level, bbox, user_srcntgt_ids, srcntgts, srcntgt_radii): + # These are "common" for a number of kernels, but not all of them. + # * srcntgt_radii should be None if srcntgts_have_extent is False + common_args = ((lld.morton_bin_counts, + lld.morton_nrs, + lld.box_start_flags, + lld.srcntgt_box_ids, + lld.split_box_ids, + lld.box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + lld.box_srcntgt_starts, + lld.box_srcntgt_counts_cumul, + lld.box_parent_ids, + lld.box_levels, + level, + bbox, + user_srcntgt_ids) + + tuple(srcntgts)) + if srcntgt_radii is not None: + common_args += (srcntgt_radii,) + + return common_args + + # }}} + + # {{{ morton count scan + + def morton_count_scan(self, queue, lld, refine_weights, + max_leaf_refine_weight, level, bbox, user_srcntgt_ids, + srcntgts, srcntgt_radii, srcntgts_have_extent, + stick_out_factor, knl_info, wait_for): + nsrcntgts = len(srcntgts[0]) + + morton_count_args = self.get_common_args( + lld, refine_weights, max_leaf_refine_weight, + level, bbox, user_srcntgt_ids, srcntgts, + srcntgt_radii) + + if srcntgts_have_extent: + morton_count_args += (stick_out_factor,) + + # writes: box_morton_bin_counts + evt = knl_info.morton_count_scan( + *morton_count_args, queue=queue, size=nsrcntgts, + wait_for=wait_for) + + return evt + + # }}} + + # {{{ split_box_id scan + + def split_box_id_scan(self, queue, lld, refine_weights, + max_leaf_refine_weight, level, level_start_box_nrs, + knl_info, wait_for): + # writes: box_has_children, split_box_ids + evt = knl_info.split_box_id_scan( + lld.srcntgt_box_ids, + lld.box_srcntgt_counts_cumul, + lld.box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + lld.box_levels, + lld.level_start_box_nrs, + lld.level_used_box_counts, + lld.force_split_box, + level, + + # output: + lld.box_has_children, + lld.split_box_ids, + lld.have_oversize_split_box, + + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) + + return evt + + # }}} + + # {{{ recompute used box and leaf counts + + def recompute_used_box_and_leaf_counts(self, lld, dimensions, + level_start_box_nrs, level_used_box_counts, level_leaf_counts): + # Run this after the split_box_id scan. + + # The last split_box_id on each level tells us how many boxes are + # needed at the next level. + new_level_used_box_counts = [1] + for level_start_box_id in level_start_box_nrs[1:]: + last_box_on_prev_level = level_start_box_id - 1 + new_level_used_box_counts.append( + # FIXME: Get this all at once. + int(lld.split_box_ids[last_box_on_prev_level].get()) + - level_start_box_id) + + # New leaf count = + # old leaf count + # + nr. new boxes from splitting parent's leaves + # - nr. new boxes from splitting current level's leaves / 2**d + level_used_box_counts_diff = (new_level_used_box_counts + - np.append(level_used_box_counts, [0])) + new_level_leaf_counts = (level_leaf_counts + + level_used_box_counts_diff[:-1] + - level_used_box_counts_diff[1:] // 2 ** dimensions) + new_level_leaf_counts = np.append( + new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) + del level_used_box_counts_diff + + return new_level_used_box_counts, new_level_leaf_counts + + # }}} + + # {{{ update level_start_box_nrs, level_used_box_counts, level_leaf_counts + + def update_level_statistics_with_new_level( + self, lld, level, nboxes_new, level_start_box_nrs, level_used_box_counts, + level_leaf_counts, debug): + wait_for = [] + + new_level_start_box_nrs = list(level_start_box_nrs) + new_level_start_box_nrs.append(nboxes_new) + lld.level_start_box_nrs[level + 1].fill(nboxes_new) + wait_for.extend(lld.level_start_box_nrs.events) + + new_level_used_box_counts = list(level_used_box_counts) + lld.level_used_box_counts[:level + 1] = ( + np.array(new_level_used_box_counts, + dtype=lld.level_used_box_counts.dtype)) + wait_for.extend(lld.level_used_box_counts.events) + + new_level_leaf_counts = list(level_leaf_counts) + if debug: + # Ensure leaf count consistency. + for level_start, level_nboxes, leaf_count in zip( + level_start_box_nrs, + level_used_box_counts, + level_leaf_counts): + if level_nboxes == 0: + assert leaf_count == 0 + continue + nleaves_actual = level_nboxes - int( + cl.array.sum(lld.box_has_children[ + level_start:level_start + level_nboxes]).get()) + assert leaf_count == nleaves_actual + + return ( + new_level_start_box_nrs, + new_level_used_box_counts, + new_level_leaf_counts, + wait_for) + + # }}} + + # {{{ split boxes + + def split_boxes( + self, lld, refine_weights, max_leaf_refine_weight, + level, bbox, user_srcntgt_ids, srcntgts, srcntgt_radii, + root_extent, nsrcntgts, level_start_box_nrs, + level_used_box_counts, knl_info, wait_for, debug): + common_args = self.get_common_args( + lld, refine_weights, max_leaf_refine_weight, + level, bbox, user_srcntgt_ids, srcntgts, + srcntgt_radii) + + # {{{ split boxes + + box_splitter_args = ( + common_args + + (lld.box_has_children, lld.force_split_box, root_extent) + + lld.box_child_ids + + lld.box_centers) + + evt = knl_info.box_splitter_kernel(*box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) + + evts = [evt] + + # Mark the levels of boxes added for padding (these were not updated + # by the box splitter kernel). + last_used_box = level_start_box_nrs[-2] + level_used_box_counts[-1] + lld.box_levels[last_used_box:level_start_box_nrs[-1]].fill(level) + + evts.extend(lld.box_levels.events) + + if debug: + lld.box_levels.finish() + level_bl_chunk = lld.box_levels.get()[ + level_start_box_nrs[-2]:level_start_box_nrs[-1]] + assert (level_bl_chunk == level).all() + del level_bl_chunk + + if debug: + assert (lld.box_srcntgt_starts.get() < nsrcntgts).all() + + # }}} + + return evts + + # }}} + + # {{{ renumber particles within split boxes + + def renumber_particles( + self, lld, refine_weights, max_leaf_refine_weight, + level, bbox, user_srcntgt_ids, srcntgts, srcntgt_radii, + nsrcntgts, knl_info, wait_for): + common_args = self.get_common_args( + lld, refine_weights, max_leaf_refine_weight, + level, bbox, user_srcntgt_ids, srcntgts, + srcntgt_radii) + + new_user_srcntgt_ids = cl.array.empty_like(user_srcntgt_ids) + new_srcntgt_box_ids = cl.array.empty_like(lld.srcntgt_box_ids) + + particle_renumberer_args = ( + common_args + + (lld.box_has_children, lld.force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) + + evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, + range=slice(nsrcntgts), wait_for=wait_for) + + user_srcntgt_ids = new_user_srcntgt_ids + del new_user_srcntgt_ids + lld = lld.copy(srcntgt_box_ids=new_srcntgt_box_ids) + del new_srcntgt_box_ids + + return lld, user_srcntgt_ids, evt + + # }}} + # {{{ run control def __call__(self, queue, particles, kind="adaptive", @@ -134,9 +883,6 @@ class TreeBuilder(object): dimensions = len(particles) - from boxtree.tools import AXIS_NAMES - axis_names = AXIS_NAMES[:dimensions] - sources_are_targets = targets is None sources_have_extent = source_radii is not None targets_have_extent = target_radii is not None @@ -176,13 +922,6 @@ class TreeBuilder(object): # }}} - empty = partial(cl.array.empty, queue, allocator=allocator) - - def zeros(shape, dtype): - result = cl.array.zeros(queue, shape, dtype, allocator=allocator) - event, = result.events - return result, event - knl_info = self.get_kernel_info(dimensions, coord_dtype, particle_id_dtype, box_id_dtype, sources_are_targets, srcntgts_have_extent, @@ -190,6 +929,9 @@ class TreeBuilder(object): logger.info("tree build: start") + empty = self.make_empty_array_allocator(queue, allocator) + zeros = self.make_zeros_array_allocator(queue, allocator) + # {{{ combine sources and targets into one array, if necessary prep_events = [] @@ -261,91 +1003,22 @@ class TreeBuilder(object): # }}} - # {{{ process refine_weights - - from boxtree.tree_build_kernels import refine_weight_dtype - - specified_max_particles_in_box = max_particles_in_box is not None - specified_refine_weights = refine_weights is not None and \ - max_leaf_refine_weight is not None - - if specified_max_particles_in_box and specified_refine_weights: - raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") - elif not specified_max_particles_in_box and not specified_refine_weights: - raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") - elif specified_max_particles_in_box: - refine_weights = ( - cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) - event, = refine_weights.events - prep_events.append(event) - max_leaf_refine_weight = max_particles_in_box - elif specified_refine_weights: - if refine_weights.dtype != refine_weight_dtype: - raise TypeError("refine_weights must have dtype '%s'" - % refine_weight_dtype) - - if max_leaf_refine_weight < cl.array.max(refine_weights).get(): - raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight") - if 0 > cl.array.min(refine_weights).get(): - raise ValueError("all entries of refine_weights must be nonnegative") - if max_leaf_refine_weight <= 0: - raise ValueError("max_leaf_refine_weight must be positive") - - total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + ( + refine_weights, + max_leaf_refine_weight, + total_refine_weight, + more_prep_events) = ( + self.process_refine_weights( + queue, allocator, refine_weights, max_leaf_refine_weight, + max_particles_in_box, nsrcntgts)) del max_particles_in_box - del specified_max_particles_in_box - del specified_refine_weights - - # }}} - - # {{{ find and process bounding box - - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() - - root_extent = max( - bbox["max_"+ax] - bbox["min_"+ax] - for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] - - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] - - # }}} - - # {{{ allocate data - - logger.debug("allocating memory") - - # box-local morton bin counts for each particle at the current level - # only valid from scan -> split'n'sort - morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) - # (local) morton nrs for each particle at the current level - # only valid from scan -> split'n'sort - morton_nrs = empty(nsrcntgts, dtype=self.morton_nr_dtype) + prep_events.extend(more_prep_events) - # 0/1 segment flags - # invariant to sorting once set - # (particles are only reordered within a box) - # valid throughout computation - box_start_flags, evt = zeros(nsrcntgts, dtype=np.int8) - prep_events.append(evt) - srcntgt_box_ids, evt = zeros(nsrcntgts, dtype=box_id_dtype) - prep_events.append(evt) + bbox, (bbox_min, bbox_max), root_extent = ( + self.find_bounding_box_and_root_extent( + srcntgts, srcntgt_radii, coord_dtype, wait_for)) # Outside nboxes_guess feeding is solely for debugging purposes, # to test the reallocation code. @@ -357,90 +1030,17 @@ class TreeBuilder(object): assert nboxes_guess > 0 - # /!\ IMPORTANT - # - # If you're allocating an array here that depends on nboxes_guess, or if - # your array contains box numbers, you have to write code for the - # following down below as well: - # - # * You *must* write reallocation code to handle box renumbering and - # reallocation triggered at the top of the level loop. - # - # * If your array persists after the level loop, you *must* write code - # to handle box renumbering and reallocation triggered by the box - # pruning step. - - split_box_ids, evt = zeros(nboxes_guess, dtype=box_id_dtype) - prep_events.append(evt) - - # per-box morton bin counts - box_morton_bin_counts, evt = zeros(nboxes_guess, - dtype=knl_info.morton_bin_count_dtype) - prep_events.append(evt) - - # particle# at which each box starts - box_srcntgt_starts, evt = zeros(nboxes_guess, dtype=particle_id_dtype) - prep_events.append(evt) - - # pointer to parent box - box_parent_ids, evt = zeros(nboxes_guess, dtype=box_id_dtype) - prep_events.append(evt) - - # pointer to child box, by morton number - box_child_ids, evts = zip( - *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) - prep_events.extend(evts) - - # box centers, by dimension - box_centers, evts = zip( - *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) - prep_events.extend(evts) - - # Initialize box_centers[0] to contain the root box's center - for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 - box_centers[d][0].fill(center_ax, wait_for=[evt]) - - # box -> level map - box_levels, evt = zeros(nboxes_guess, self.box_level_dtype) - prep_events.append(evt) - - # number of particles in each box - # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) - prep_events.append(evt) - - # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill( - nsrcntgts, queue=queue, wait_for=[evt]) - - # box -> whether the box has a child. FIXME: use smaller integer type - box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) - prep_events.append(evt) + lld, more_prep_events = self.allocate_level_loop_data(queue, dimensions, + allocator, nsrcntgts, bbox, nboxes_guess, knl_info, box_id_dtype, + particle_id_dtype, coord_dtype, wait_for) - # box -> whether the box needs a splitting to enforce level restriction. - # FIXME: use smaller integer type - force_split_box, evt = zeros(nboxes_guess - if knl_info.level_restrict - else 0, dtype=np.dtype(np.int32)) - prep_events.append(evt) - - # set parent of root box to itself - evt = cl.enqueue_copy( - queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) - prep_events.append(evt) - - nlevels_max = np.iinfo(self.box_level_dtype).max + prep_events.extend(more_prep_events) - # level -> starting box on level - level_start_box_nrs_dev, evt = zeros(nlevels_max, dtype=box_id_dtype) - prep_events.append(evt) + wait_for = prep_events - # level -> number of used boxes on level - level_used_box_counts_dev, evt = zeros(nlevels_max, dtype=box_id_dtype) - prep_events.append(evt) + from pytools import div_ceil - # }}} + # {{{ level loop def fin_debug(s): if debug: @@ -449,26 +1049,13 @@ class TreeBuilder(object): logger.debug(s) from pytools.obj_array import make_obj_array - have_oversize_split_box, evt = zeros((), np.int32) - prep_events.append(evt) - - # True if and only if the level restrict kernel found a box to split in - # order to enforce level restriction. - have_upper_level_split_box, evt = zeros((), np.int32) - prep_events.append(evt) - - wait_for = prep_events - - from pytools import div_ceil - - # {{{ level loop # Level 0 starts at 0 and always contains box 0 and nothing else. # Level 1 therefore starts at 1. level_start_box_nrs = [0, 1] - level_start_box_nrs_dev[0] = 0 - level_start_box_nrs_dev[1] = 1 - wait_for.extend(level_start_box_nrs_dev.events) + lld.level_start_box_nrs[0] = 0 + lld.level_start_box_nrs[1] = 1 + wait_for.extend(lld.level_start_box_nrs.events) # This counts the number of boxes that have been used per level. Note # that this could be fewer than the actual number of boxes allocated to @@ -476,8 +1063,8 @@ class TreeBuilder(object): # are pre-allocated for a level than used since we may decide to split # parent level boxes later). level_used_box_counts = [1] - level_used_box_counts_dev[0] = 1 - wait_for.extend(level_used_box_counts_dev.events) + lld.level_used_box_counts[0] = 1 + wait_for.extend(lld.level_used_box_counts.events) # level -> number of leaf boxes on level. Initially the root node is a # leaf. @@ -509,316 +1096,74 @@ class TreeBuilder(object): # regarding this). This flag is set to True when that happens. final_level_restrict_iteration = False + # When level-restricting, specifies the number of levels to look upwards + # for unsplit boxes while reserving space in the reallocator. + # + # Currently undocumented. + lr_lookbehind = kwargs.get("lr_lookbehind", 1) + while level: + # {{{ check loop invariants if debugging enabled + if debug: - # More invariants: assert level == len(level_start_box_nrs) - 1 assert level == len(level_used_box_counts) assert level == len(level_leaf_counts) + assert (lld.level_start_box_nrs[:level].get() + == np.array(level_start_box_nrs[:level])).all() + assert (lld.level_used_box_counts[:level].get() + == np.array(level_used_box_counts)).all() + + # }}} + if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ((morton_bin_counts, morton_nrs, - box_start_flags, - srcntgt_box_ids, split_box_ids, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_srcntgt_starts, box_srcntgt_counts_cumul, - box_parent_ids, box_levels, - level, bbox, - user_srcntgt_ids) - + tuple(srcntgts) - + ((srcntgt_radii,) if srcntgts_have_extent else ()) - ) - fin_debug("morton count scan") - morton_count_args = common_args - if srcntgts_have_extent: - morton_count_args += (stick_out_factor,) + evt = self.morton_count_scan(queue, lld, refine_weights, + max_leaf_refine_weight, level, bbox, user_srcntgt_ids, + srcntgts, srcntgt_radii, srcntgts_have_extent, + stick_out_factor, knl_info, wait_for) - # writes: box_morton_bin_counts - evt = knl_info.morton_count_scan( - *morton_count_args, queue=queue, size=nsrcntgts, - wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") - # writes: box_has_children, split_box_ids - evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, - - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, + evt = self.split_box_id_scan(queue, lld, refine_weights, + max_leaf_refine_weight, level, level_start_box_nrs, + knl_info, wait_for) - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) wait_for = [evt] - # {{{ compute new level_used_box_counts, level_leaf_counts - - # The last split_box_id on each level tells us how many boxes are - # needed at the next level. - new_level_used_box_counts = [1] - for level_start_box_id in level_start_box_nrs[1:]: - last_box_on_prev_level = level_start_box_id - 1 - new_level_used_box_counts.append( - # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) - - # New leaf count = - # old leaf count - # + nr. new boxes from splitting parent's leaves - # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = (level_leaf_counts - + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2 ** dimensions) - new_level_leaf_counts = np.append( - new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) - del level_used_box_counts_diff + new_level_used_box_counts, new_level_leaf_counts = \ + self.recompute_used_box_and_leaf_counts( + lld, dimensions, level_start_box_nrs, level_used_box_counts, + level_leaf_counts) - # }}} + if debug: + assert (np.array(new_level_leaf_counts) >= 0).all() + assert (np.array(new_level_used_box_counts) >= 0).all() - # Assumption: Everything between here and the top of the loop must + # ASSUMPTION: Everything between here and the top of the loop must # be repeatable, so that in an out-of-memory situation, we can just # rerun this bit of the code after reallocating and a minimal reset # procedure. - # The algorithm for deciding on level sizes is as follows: - # 1. Compute the minimal necessary size of each level, including the - # new level being created. - # 2. If level restricting, add padding to the new level being created. - # 3. Check if there is enough existing space for each level. - # 4. If any level does not have sufficient space, reallocate all levels: - # 4a. Compute new sizes of upper levels - # 4b. If level restricting, add padding to all levels. - - curr_upper_level_lengths = np.diff(level_start_box_nrs) - minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) - minimal_new_level_length = new_level_used_box_counts[-1] - - # Allocate extra space at the end of the current level for higher - # level leaves that may be split later. - # - # If there are no further levels to split (i.e. - # have_oversize_split_box = 0), then we do not need to allocate any - # extra space, since no new leaves can be created at the bottom - # level. - if knl_info.level_restrict and have_oversize_split_box.get(): - # Currently undocumented. - lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) - minimal_new_level_length += sum( - 2**(l*dimensions) * new_level_leaf_counts[level - l] - for l in range(1, 1 + min(level, lr_lookbehind_levels))) - - nboxes_minimal = \ - sum(minimal_upper_level_lengths) + minimal_new_level_length - - needs_renumbering = \ - (curr_upper_level_lengths < minimal_upper_level_lengths).any() - - # {{{ prepare for reallocation/renumbering - - if needs_renumbering: - assert knl_info.level_restrict - - # {{{ compute new level_start_box_nrs - - # Represents the amount of padding needed for upper levels. - upper_level_padding = np.zeros(level, dtype=int) - - # Recompute the level padding. - for ulevel in range(level): - upper_level_padding[ulevel] = sum( - 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] - for l in range( - 1, 1 + min(ulevel, lr_lookbehind_levels))) - - new_upper_level_unused_box_counts = np.max( - [upper_level_padding, - minimal_upper_level_lengths - new_level_used_box_counts[:-1]], - axis=0) - - new_level_start_box_nrs = np.empty(level + 1, dtype=int) - new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[1:] = np.cumsum( - new_level_used_box_counts[:-1] - + new_upper_level_unused_box_counts) - - assert not (level_start_box_nrs == new_level_start_box_nrs).all() - - # }}} - - # {{{ set up reallocators - - old_box_count = level_start_box_nrs[-1] - # Where should I put this box? - dst_box_id = cl.array.empty(queue, - shape=old_box_count, dtype=box_id_dtype) - - for level_start, new_level_start, level_len in zip( - level_start_box_nrs, new_level_start_box_nrs, - curr_upper_level_lengths): - dst_box_id[level_start:level_start + level_len] = \ - cl.array.arange(queue, - new_level_start, - new_level_start + level_len, - dtype=box_id_dtype) - - wait_for.extend(dst_box_id.events) - - realloc_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, map_values=dst_box_id, - range=slice(old_box_count), debug=debug) - renumber_array = partial(self.map_values_kernel, dst_box_id) - - # }}} - - # Update level_start_box_nrs. This will be the - # level_start_box_nrs for the reallocated data. - - level_start_box_nrs = list(new_level_start_box_nrs) - level_start_box_nrs_dev[:level + 1] = \ - np.array(new_level_start_box_nrs, dtype=box_id_dtype) - level_start_box_nrs_updated = True - wait_for.extend(level_start_box_nrs_dev.events) - - nboxes_new = level_start_box_nrs[-1] + minimal_new_level_length - - del new_level_start_box_nrs - else: - from boxtree.tools import realloc_array - realloc_and_renumber_array = realloc_array - renumber_array = None - level_start_box_nrs_updated = False - nboxes_new = nboxes_minimal - - del nboxes_minimal - - # }}} - - # {{{ reallocate and/or renumber boxes if necessary - - if level_start_box_nrs_updated or nboxes_new > nboxes_guess: - fin_debug("starting nboxes_guess increase") - - while nboxes_guess < nboxes_new: - nboxes_guess *= 2 - - def my_realloc_nocopy(ary): - return cl.array.empty(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) - - def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) - return result, result.events[0] - - my_realloc = partial(realloc_array, - queue, allocator, nboxes_guess, wait_for=wait_for) - my_realloc_zeros = partial(realloc_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) - - resize_events = [] - - split_box_ids = my_realloc_nocopy(split_box_ids) - - # *Most*, but not *all* of the values in this array are - # rewritten when the morton scan is redone. Specifically, - # only the box morton bin counts of boxes on the level - # currently being processed are written-but we need to - # retain the box morton bin counts from the higher levels. - box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) - resize_events.append(evt) - - # force_split_box is unused unless level restriction is enabled. - if knl_info.level_restrict: - force_split_box, evt = my_realloc_zeros(force_split_box) - resize_events.append(evt) - - box_srcntgt_starts, evt = my_realloc_zeros(box_srcntgt_starts) - resize_events.append(evt) - - box_srcntgt_counts_cumul, evt = \ - my_realloc_zeros(box_srcntgt_counts_cumul) - resize_events.append(evt) - - box_has_children, evt = my_realloc_zeros(box_has_children) - resize_events.append(evt) - - box_centers, evts = zip( - *(my_realloc(ary) for ary in box_centers)) - resize_events.extend(evts) - - box_child_ids, evts = zip( - *(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) - resize_events.extend(evts) - - box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) - resize_events.append(evt) - - if not level_start_box_nrs_updated: - box_levels, evt = my_realloc(box_levels) - resize_events.append(evt) - else: - box_levels, evt = my_realloc_zeros_nocopy(box_levels) - cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate(zip( - level_start_box_nrs, level_start_box_nrs[1:])): - box_levels[level_start:level_end].fill(box_level) - resize_events.extend(box_levels.events) - - if level_start_box_nrs_updated: - srcntgt_box_ids, evt = renumber_array(srcntgt_box_ids) - resize_events.append(evt) - - del my_realloc_zeros - del my_realloc_nocopy - del my_realloc_zeros_nocopy - del renumber_array - - # Can't del on Py2.7 - these are used in generator expressions - # above, which are nested scopes - my_realloc = None - my_realloc_zeros_and_renumber = None + lld, nboxes_guess, nboxes_new, level_start_box_nrs, reallocated = \ + self.reallocate_level_loop_data_if_needed( + queue, dimensions, allocator, level, lld, level_start_box_nrs, + level_used_box_counts, new_level_used_box_counts, + new_level_leaf_counts, nboxes_guess, + lr_lookbehind, knl_info, debug, wait_for) + if reallocated: # retry logger.info("nboxes_guess exceeded: " "enlarged allocations, restarting level") - continue - # }}} - logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) assert ( @@ -837,98 +1182,34 @@ class TreeBuilder(object): break assert final_level_restrict_iteration - # {{{ update level_start_box_nrs, level_used_box_counts - - level_start_box_nrs.append(nboxes_new) - level_start_box_nrs_dev[level + 1].fill(nboxes_new) - wait_for.extend(level_start_box_nrs_dev.events) - - level_used_box_counts = new_level_used_box_counts - level_used_box_counts_dev[:level + 1] = \ - np.array(level_used_box_counts, dtype=box_id_dtype) - wait_for.extend(level_used_box_counts_dev.events) - - level_leaf_counts = new_level_leaf_counts - if debug: - for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, - level_used_box_counts, - level_leaf_counts): - if level_nboxes == 0: - assert leaf_count == 0 - continue - nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[ - level_start:level_start + level_nboxes]).get()) - assert leaf_count == nleaves_actual - - # Can't del in Py2.7 - see note below - new_level_leaf_counts = None - - # }}} - - del nboxes_new - del new_level_used_box_counts - - # {{{ split boxes - - box_splitter_args = ( - common_args - + (box_has_children, force_split_box, root_extent) - + box_child_ids - + box_centers) - - evt = knl_info.box_splitter_kernel(*box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) - - wait_for = [evt] - - fin_debug("box splitter") - - # Mark the levels of boxes added for padding (these were not updated - # by the box splitter kernel). - last_used_box = level_start_box_nrs[-2] + level_used_box_counts[-1] - box_levels[last_used_box:level_start_box_nrs[-1]].fill(level) - - wait_for.extend(box_levels.events) - - if debug: - box_levels.finish() - level_bl_chunk = box_levels.get()[ - level_start_box_nrs[-2]:level_start_box_nrs[-1]] - assert (level_bl_chunk == level).all() - del level_bl_chunk - - if debug: - assert (box_srcntgt_starts.get() < nsrcntgts).all() + (level_start_box_nrs, + level_used_box_counts, + level_leaf_counts, + evts) = self.update_level_statistics_with_new_level( + lld, level, nboxes_new, level_start_box_nrs, + new_level_used_box_counts, new_level_leaf_counts, debug) - # }}} + wait_for.extend(evts) - # {{{ renumber particles within split boxes + evts = self.split_boxes( + lld, refine_weights, max_leaf_refine_weight, level, bbox, + user_srcntgt_ids, srcntgts, srcntgt_radii, root_extent, + nsrcntgts, level_start_box_nrs, level_used_box_counts, knl_info, + wait_for, debug) - new_user_srcntgt_ids = cl.array.empty_like(user_srcntgt_ids) - new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) + wait_for = evts - particle_renumberer_args = ( - common_args - + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + fin_debug("split boxes") - evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, - range=slice(nsrcntgts), wait_for=wait_for) + lld, user_srcntgt_ids, evt = self.renumber_particles( + lld, refine_weights, max_leaf_refine_weight, level, bbox, + user_srcntgt_ids, srcntgts, srcntgt_radii, nsrcntgts, knl_info, + wait_for) wait_for = [evt] fin_debug("particle renumbering") - user_srcntgt_ids = new_user_srcntgt_ids - del new_user_srcntgt_ids - srcntgt_box_ids = new_srcntgt_box_ids - del new_srcntgt_box_ids - - # }}} - # {{{ enforce level restriction on upper levels if final_level_restrict_iteration: @@ -942,7 +1223,7 @@ class TreeBuilder(object): # reallocation code. In order to fix this issue, the box # numbering and reallocation code needs to be accessible after # the final level restriction is done. - assert int(have_oversize_split_box.get()) == 0 + assert int(lld.have_oversize_split_box.get()) == 0 assert level_used_box_counts[-1] == 0 del level_used_box_counts[-1] del level_start_box_nrs[-1] @@ -958,9 +1239,9 @@ class TreeBuilder(object): # Upward pass - check if leaf boxes at higher levels need # further splitting. - assert len(force_split_box) > 0 - force_split_box.fill(0) - wait_for.extend(force_split_box.events) + assert len(lld.force_split_box) > 0 + lld.force_split_box.fill(0) + wait_for.extend(lld.force_split_box.events) did_upper_level_split = False @@ -981,28 +1262,28 @@ class TreeBuilder(object): upper_level_slice = slice( upper_level_start, upper_level_start + upper_level_box_count) - have_upper_level_split_box.fill(0) - wait_for.extend(have_upper_level_split_box.events) + lld.have_upper_level_split_box.fill(0) + wait_for.extend(lld.have_upper_level_split_box.events) # writes: force_split_box, have_upper_level_split_box evt = level_restrict_kernel( upper_level, root_extent, - box_has_children, - force_split_box, - have_upper_level_split_box, - *(box_child_ids + box_centers), + lld.box_has_children, + lld.force_split_box, + lld.have_upper_level_split_box, + *(lld.box_child_ids + lld.box_centers), slice=upper_level_slice, wait_for=wait_for) wait_for = [evt] if debug: - force_split_box.finish() + lld.force_split_box.finish() boxes_split.append(int(cl.array.sum( - force_split_box[upper_level_slice]).get())) + lld.force_split_box[upper_level_slice]).get())) - if int(have_upper_level_split_box.get()) == 0: + if int(lld.have_upper_level_split_box.get()) == 0: break did_upper_level_split = True @@ -1018,7 +1299,9 @@ class TreeBuilder(object): .format(level=level_, nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: + if ( + int(lld.have_oversize_split_box.get()) == 0 + and did_upper_level_split): # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1031,19 +1314,19 @@ class TreeBuilder(object): # }}} - if not int(have_oversize_split_box.get()): + if not int(lld.have_oversize_split_box.get()): logger.debug("no boxes left to split") break level += 1 - have_oversize_split_box.fill(0) + lld.have_oversize_split_box.fill(0) # {{{ check that nonchild part of box_morton_bin_counts is consistent if debug and 0: - h_box_morton_bin_counts = box_morton_bin_counts.get() - h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - h_box_child_ids = tuple(bci.get() for bci in box_child_ids) + h_box_morton_bin_counts = lld.box_morton_bin_counts.get() + h_box_srcntgt_counts_cumul = lld.box_srcntgt_counts_cumul.get() + h_box_child_ids = tuple(bci.get() for bci in lld.box_child_ids) has_mismatch = False for ibox in range(level_start_box_nrs[-1]): @@ -1102,8 +1385,8 @@ class TreeBuilder(object): evt = knl_info.extract_nonchild_srcntgt_count_kernel( # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, + lld.box_morton_bin_counts, + lld.box_srcntgt_counts_cumul, highest_possibly_split_box_nr, # output @@ -1116,7 +1399,7 @@ class TreeBuilder(object): if debug: h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() - h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() + h_box_srcntgt_counts_cumul = lld.box_srcntgt_counts_cumul.get() assert (h_box_srcntgt_counts_nonchild <= h_box_srcntgt_counts_cumul[:nboxes]).all() @@ -1128,8 +1411,8 @@ class TreeBuilder(object): # }}} - del morton_nrs - del box_morton_bin_counts + del lld.morton_nrs + del lld.box_morton_bin_counts # {{{ prune empty/unused leaf boxes @@ -1150,7 +1433,7 @@ class TreeBuilder(object): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, + lld.box_srcntgt_counts_cumul, src_box_id, dst_box_id, nboxes_post_prune_dev, size=nboxes, wait_for=wait_for) wait_for = [evt] @@ -1197,23 +1480,24 @@ class TreeBuilder(object): src_indices=src_box_id, range=slice(nboxes_post_prune), debug=debug) - box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) + box_srcntgt_starts, evt = prune_empty(lld.box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty(lld.box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, lld.srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty( + lld.box_parent_ids, map_values=dst_box_id) prune_events.append(evt) - box_levels, evt = prune_empty(box_levels) + box_levels, evt = prune_empty(lld.box_levels) prune_events.append(evt) if srcntgts_have_extent: @@ -1221,37 +1505,49 @@ class TreeBuilder(object): box_srcntgt_counts_nonchild) prune_events.append(evt) - box_has_children, evt = prune_empty(box_has_children) + box_has_children, evt = prune_empty(lld.box_has_children) prune_events.append(evt) box_child_ids, evts = zip( *(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + for ary in lld.box_child_ids)) prune_events.extend(evts) box_centers, evts = zip( - *(prune_empty(ary) for ary in box_centers)) + *(prune_empty(ary) for ary in lld.box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. box_levels.finish() evt = knl_info.find_level_box_counts_kernel( - box_levels, level_used_box_counts_dev) + box_levels, lld.level_used_box_counts) cl.wait_for_events([evt]) nlevels = len(level_used_box_counts) - level_used_box_counts = level_used_box_counts_dev[:nlevels].get() + level_used_box_counts = lld.level_used_box_counts[:nlevels].get() level_start_box_nrs = [0] level_start_box_nrs.extend(np.cumsum(level_used_box_counts)) - level_start_box_nrs_dev[:nlevels + 1] = np.array( + lld.level_start_box_nrs[:nlevels + 1] = np.array( level_start_box_nrs, dtype=box_id_dtype) - prune_events.extend(level_start_box_nrs_dev.events) + prune_events.extend(lld.level_start_box_nrs.events) wait_for = prune_events else: + box_srcntgt_starts = lld.box_srcntgt_starts + box_srcntgt_counts_cumul = lld.box_srcntgt_counts_cumul + srcntgt_box_ids = lld.srcntgt_box_ids + box_parent_ids = lld.box_parent_ids + box_levels = lld.box_levels + if srcntgts_have_extent: + box_srcntgt_counts_nonchild = lld.box_srcntgt_counts_nonchild + + box_has_children = lld.box_has_children + box_child_ids = lld.box_child_ids + box_centers = lld.box_centers + logger.info("skipping empty-leaf pruning") nboxes_post_prune = nboxes @@ -1273,7 +1569,7 @@ class TreeBuilder(object): box_srcntgt_counts_cumul if srcntgts_have_extent: box_source_counts_nonchild = box_target_counts_nonchild = \ - box_srcntgt_counts_nonchild + lld.box_srcntgt_counts_nonchild else: source_numbers = empty(nsrcntgts, particle_id_dtype) @@ -1555,7 +1851,7 @@ class TreeBuilder(object): bounding_box=(bbox_min, bbox_max), level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, + level_start_box_nrs_dev=lld.level_start_box_nrs, sources=sources, targets=targets,