From d32eb9319f7531d2ffea83787c97aff7a272b413 Mon Sep 17 00:00:00 2001 From: xywei Date: Sat, 11 Nov 2017 09:58:13 -0600 Subject: [PATCH 01/16] Expose bounding box from tree builder --- boxtree/tree_build.py | 826 +++++++++++++++++++++++------------------- 1 file changed, 455 insertions(+), 371 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2566dc4..afa9c31 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,7 +22,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - from six.moves import range, zip import numpy as np @@ -59,27 +58,42 @@ class TreeBuilder(object): ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method - def get_kernel_info(self, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind): + def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, + box_id_dtype, sources_are_targets, srcntgts_extent_norm, + kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info - return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - self.morton_nr_dtype, self.box_level_dtype, + return get_tree_build_kernel_info( + self.context, + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + self.morton_nr_dtype, + self.box_level_dtype, kind=kind) # {{{ run control - def __call__(self, queue, particles, kind="adaptive", - max_particles_in_box=None, allocator=None, debug=False, - targets=None, source_radii=None, target_radii=None, - stick_out_factor=None, refine_weights=None, - max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, - **kwargs): + def __call__(self, + queue, + particles, + kind="adaptive", + max_particles_in_box=None, + allocator=None, + debug=False, + targets=None, + source_radii=None, + target_radii=None, + stick_out_factor=None, + refine_weights=None, + max_leaf_refine_weight=None, + wait_for=None, + extent_norm=None, + bbox=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -118,6 +132,8 @@ class TreeBuilder(object): execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: When specified, the bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -127,7 +143,9 @@ class TreeBuilder(object): # {{{ input processing - if kind not in ["adaptive", "adaptive-level-restricted", "non-adaptive"]: + if kind not in [ + "adaptive", "adaptive-level-restricted", "non-adaptive" + ]: raise ValueError("unknown tree kind \"{0}\"".format(kind)) # we'll modify this below, so copy it @@ -149,8 +167,8 @@ class TreeBuilder(object): extent_norm = "linf" if extent_norm not in ["linf", "l2"]: - raise ValueError("unexpected value of 'extent_norm': %s" - % extent_norm) + raise ValueError( + "unexpected value of 'extent_norm': %s" % extent_norm) srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent @@ -161,7 +179,7 @@ class TreeBuilder(object): if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " - "any kind of radii") + "any kind of radii") from pytools import single_valued particle_id_dtype = np.int32 @@ -176,25 +194,26 @@ class TreeBuilder(object): nsrcntgts = nsources + ntargets if source_radii is not None: - if source_radii.shape != (nsources,): + if source_radii.shape != (nsources, ): raise ValueError("source_radii has an invalid shape") if source_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "source_radii must agree") + "source_radii must agree") if target_radii is not None: - if target_radii.shape != (ntargets,): + if target_radii.shape != (ntargets, ): raise ValueError("target_radii has an invalid shape") if target_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "target_radii must agree") + "target_radii must agree") if sources_have_extent or targets_have_extent: if stick_out_factor is None: - raise ValueError("if sources or targets have extent, " - "stick_out_factor must be explicitly specified") + raise ValueError( + "if sources or targets have extent, " + "stick_out_factor must be explicitly specified") else: stick_out_factor = 0 @@ -207,10 +226,14 @@ class TreeBuilder(object): event, = result.events return result, event - knl_info = self.get_kernel_info(dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind=kind) + knl_info = self.get_kernel_info( + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + kind=kind) logger.info("tree build: start") @@ -240,7 +263,7 @@ class TreeBuilder(object): if target_coord_dtype != coord_dtype: raise TypeError("sources and targets must have same coordinate " - "dtype") + "dtype") def combine_srcntgt_arrays(ary1, ary2=None): if ary2 is None: @@ -264,10 +287,11 @@ class TreeBuilder(object): srcntgts = make_obj_array([ combine_srcntgt_arrays(src_i, tgt_i) for src_i, tgt_i in zip(particles, targets) - ]) + ]) if srcntgts_have_extent: - srcntgt_radii = combine_srcntgt_arrays(source_radii, target_radii) + srcntgt_radii = combine_srcntgt_arrays(source_radii, + target_radii) else: srcntgt_radii = None @@ -276,8 +300,8 @@ class TreeBuilder(object): del particles - user_srcntgt_ids = cl.array.arange(queue, nsrcntgts, dtype=particle_id_dtype, - allocator=allocator) + user_srcntgt_ids = cl.array.arange( + queue, nsrcntgts, dtype=particle_id_dtype, allocator=allocator) evt, = user_srcntgt_ids.events wait_for.append(evt) @@ -295,33 +319,34 @@ class TreeBuilder(object): if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif not specified_max_particles_in_box and not specified_refine_weights: raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif specified_max_particles_in_box: - refine_weights = ( - cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) + refine_weights = (cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) max_leaf_refine_weight = max_particles_in_box elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: - raise TypeError("refine_weights must have dtype '%s'" - % refine_weight_dtype) + raise TypeError( + "refine_weights must have dtype '%s'" % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight") + "entries of refine_weights cannot exceed max_leaf_refine_weight" + ) if 0 > cl.array.min(refine_weights).get(): - raise ValueError("all entries of refine_weights must be nonnegative") + raise ValueError( + "all entries of refine_weights must be nonnegative") if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box @@ -331,22 +356,35 @@ class TreeBuilder(object): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_"+ax] - bbox["min_"+ax] - for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( + 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] + else: + # all coordinates must be at the interior (boundary excluded) + # of the bbox + # Currently only cubic domain has ensured mesh reconstruction. + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert (bbox_min[i] < bbox_max[i]) + root_extent = max(bbox_max - bbox_min) # }}} @@ -356,7 +394,8 @@ class TreeBuilder(object): # box-local morton bin counts for each particle at the current level # only valid from scan -> split'n'sort - morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + morton_bin_counts = empty( + nsrcntgts, dtype=knl_info.morton_bin_count_dtype) # (local) morton nrs for each particle at the current level # only valid from scan -> split'n'sort @@ -376,8 +415,8 @@ class TreeBuilder(object): nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: nboxes_guess = 2**dimensions * ( - (max_leaf_refine_weight + total_refine_weight - 1) - // max_leaf_refine_weight) + (max_leaf_refine_weight + total_refine_weight - 1 + ) // max_leaf_refine_weight) assert nboxes_guess > 0 @@ -398,8 +437,8 @@ class TreeBuilder(object): prep_events.append(evt) # per-box morton bin counts - box_morton_bin_counts, evt = zeros(nboxes_guess, - dtype=knl_info.morton_bin_count_dtype) + box_morton_bin_counts, evt = zeros( + nboxes_guess, dtype=knl_info.morton_bin_count_dtype) prep_events.append(evt) # particle# at which each box starts @@ -411,18 +450,19 @@ class TreeBuilder(object): prep_events.append(evt) # pointer to child box, by morton number - box_child_ids, evts = zip( - *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) + box_child_ids, evts = zip(*(zeros(nboxes_guess, dtype=box_id_dtype) + for d in range(2**dimensions))) prep_events.extend(evts) # box centers, by dimension - box_centers, evts = zip( - *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) + box_centers, evts = zip(*(zeros(nboxes_guess, dtype=coord_dtype) + for d in range(dimensions))) prep_events.extend(evts) # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 + center_ax = bbox["min_" + + ax] + (bbox["max_" + ax] - bbox["min_" + ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map @@ -431,12 +471,12 @@ class TreeBuilder(object): # number of particles in each box # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) + box_srcntgt_counts_cumul, evt = zeros( + nboxes_guess, dtype=particle_id_dtype) prep_events.append(evt) # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill( - nsrcntgts, queue=queue, wait_for=[evt]) + box_srcntgt_counts_cumul[0].fill(nsrcntgts, queue=queue, wait_for=[evt]) # box -> whether the box has a child. FIXME: use smaller integer type box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) @@ -444,14 +484,14 @@ class TreeBuilder(object): # box -> whether the box needs a splitting to enforce level restriction. # FIXME: use smaller integer type - force_split_box, evt = zeros(nboxes_guess - if knl_info.level_restrict - else 0, dtype=np.dtype(np.int32)) + force_split_box, evt = zeros( + nboxes_guess if knl_info.level_restrict else 0, + dtype=np.dtype(np.int32)) prep_events.append(evt) # set parent of root box to itself - evt = cl.enqueue_copy( - queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) + evt = cl.enqueue_copy(queue, box_parent_ids.data, + np.zeros((), dtype=box_parent_ids.dtype)) prep_events.append(evt) nlevels_max = np.iinfo(self.box_level_dtype).max @@ -543,55 +583,50 @@ class TreeBuilder(object): if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ((morton_bin_counts, morton_nrs, - box_start_flags, - srcntgt_box_ids, split_box_ids, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_srcntgt_starts, box_srcntgt_counts_cumul, - box_parent_ids, box_levels, - level, bbox, - user_srcntgt_ids) - + tuple(srcntgts) - + ((srcntgt_radii,) if srcntgts_have_extent else ()) - ) + common_args = ( + (morton_bin_counts, morton_nrs, box_start_flags, + srcntgt_box_ids, split_box_ids, box_morton_bin_counts, + refine_weights, max_leaf_refine_weight, box_srcntgt_starts, + box_srcntgt_counts_cumul, box_parent_ids, box_levels, level, + bbox, user_srcntgt_ids) + tuple(srcntgts) + + ((srcntgt_radii, ) if srcntgts_have_extent else ())) fin_debug("morton count scan") morton_count_args = common_args if srcntgts_have_extent: - morton_count_args += (stick_out_factor,) + morton_count_args += (stick_out_factor, ) # writes: box_morton_bin_counts evt = knl_info.morton_count_scan( - *morton_count_args, queue=queue, size=nsrcntgts, - wait_for=wait_for) + *morton_count_args, + queue=queue, + size=nsrcntgts, + wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") # writes: box_has_children, split_box_ids evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, + srcntgt_box_ids, + box_srcntgt_counts_cumul, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_levels, + level_start_box_nrs_dev, + level_used_box_counts_dev, + force_split_box, + level, - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, - - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) + # output: + box_has_children, + split_box_ids, + have_oversize_split_box, + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) wait_for = [evt] # {{{ compute new level_used_box_counts, level_leaf_counts @@ -603,21 +638,20 @@ class TreeBuilder(object): last_box_on_prev_level = level_start_box_id - 1 new_level_used_box_counts.append( # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) + int(split_box_ids[last_box_on_prev_level].get()) - + level_start_box_id) # New leaf count = # old leaf count # + nr. new boxes from splitting parent's leaves # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = (level_leaf_counts - + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2 ** dimensions) - new_level_leaf_counts = np.append( - new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) + level_used_box_counts_diff = (new_level_used_box_counts - + np.append(level_used_box_counts, [0])) + new_level_leaf_counts = ( + level_leaf_counts + level_used_box_counts_diff[:-1] - + level_used_box_counts_diff[1:] // 2**dimensions) + new_level_leaf_counts = np.append(new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) del level_used_box_counts_diff # }}} @@ -638,7 +672,8 @@ class TreeBuilder(object): curr_upper_level_lengths = np.diff(level_start_box_nrs) minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) + [new_level_used_box_counts[:-1], curr_upper_level_lengths], + axis=0) minimal_new_level_length = new_level_used_box_counts[-1] # Allocate extra space at the end of the current level for higher @@ -652,7 +687,7 @@ class TreeBuilder(object): # Currently undocumented. lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) minimal_new_level_length += sum( - 2**(l*dimensions) * new_level_leaf_counts[level - l] + 2**(l * dimensions) * new_level_leaf_counts[level - l] for l in range(1, 1 + min(level, lr_lookbehind_levels))) nboxes_minimal = \ @@ -674,22 +709,25 @@ class TreeBuilder(object): # Recompute the level padding. for ulevel in range(level): upper_level_padding[ulevel] = sum( - 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] - for l in range( - 1, 1 + min(ulevel, lr_lookbehind_levels))) + 2**(l * dimensions) * new_level_leaf_counts[ulevel - l] + for l in range(1, + 1 + min(ulevel, lr_lookbehind_levels))) new_upper_level_unused_box_counts = np.max( - [upper_level_padding, - minimal_upper_level_lengths - new_level_used_box_counts[:-1]], + [ + upper_level_padding, minimal_upper_level_lengths - + new_level_used_box_counts[:-1] + ], axis=0) new_level_start_box_nrs = np.empty(level + 1, dtype=int) new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[1:] = np.cumsum( - new_level_used_box_counts[:-1] - + new_upper_level_unused_box_counts) + new_level_start_box_nrs[ + 1:] = np.cumsum(new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) - assert not (level_start_box_nrs == new_level_start_box_nrs).all() + assert not ( + level_start_box_nrs == new_level_start_box_nrs).all() # }}} @@ -697,8 +735,8 @@ class TreeBuilder(object): old_box_count = level_start_box_nrs[-1] # Where should I put this box? - dst_box_id = cl.array.empty(queue, - shape=old_box_count, dtype=box_id_dtype) + dst_box_id = cl.array.empty( + queue, shape=old_box_count, dtype=box_id_dtype) for level_start, new_level_start, level_len in zip( level_start_box_nrs, new_level_start_box_nrs, @@ -711,12 +749,17 @@ class TreeBuilder(object): wait_for.extend(dst_box_id.events) - realloc_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, map_values=dst_box_id, - range=slice(old_box_count), debug=debug) + realloc_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + map_values=dst_box_id, + range=slice(old_box_count), + debug=debug) renumber_array = partial(self.map_values_kernel, dst_box_id) # }}} @@ -753,22 +796,40 @@ class TreeBuilder(object): nboxes_guess *= 2 def my_realloc_nocopy(ary): - return cl.array.empty(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + return cl.array.empty( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + result = cl.array.zeros( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) return result, result.events[0] - my_realloc = partial(realloc_array, - queue, allocator, nboxes_guess, wait_for=wait_for) - my_realloc_zeros = partial(realloc_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) + my_realloc = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + wait_for=wait_for) + my_realloc_zeros = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial( + realloc_and_renumber_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) resize_events = [] @@ -780,7 +841,7 @@ class TreeBuilder(object): # currently being processed are written-but we need to # retain the box morton bin counts from the higher levels. box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) + box_morton_bin_counts) resize_events.append(evt) # force_split_box is unused unless level restriction is enabled. @@ -798,16 +859,16 @@ class TreeBuilder(object): box_has_children, evt = my_realloc_zeros(box_has_children) resize_events.append(evt) - box_centers, evts = zip( - *(my_realloc(ary) for ary in box_centers)) + box_centers, evts = zip(*(my_realloc(ary) + for ary in box_centers)) resize_events.extend(evts) - box_child_ids, evts = zip( - *(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(my_realloc_zeros_and_renumber(ary) + for ary in box_child_ids)) resize_events.extend(evts) - box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) + box_parent_ids, evt = my_realloc_zeros_and_renumber( + box_parent_ids) resize_events.append(evt) if not level_start_box_nrs_updated: @@ -816,8 +877,8 @@ class TreeBuilder(object): else: box_levels, evt = my_realloc_zeros_nocopy(box_levels) cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate(zip( - level_start_box_nrs, level_start_box_nrs[1:])): + for box_level, (level_start, level_end) in enumerate( + zip(level_start_box_nrs, level_start_box_nrs[1:])): box_levels[level_start:level_end].fill(box_level) resize_events.extend(box_levels.events) @@ -845,10 +906,8 @@ class TreeBuilder(object): logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) - assert ( - level_start_box_nrs[-1] != nboxes_new or - srcntgts_have_extent or - final_level_restrict_iteration) + assert (level_start_box_nrs[-1] != nboxes_new + or srcntgts_have_extent or final_level_restrict_iteration) if level_start_box_nrs[-1] == nboxes_new: # We haven't created new boxes in this level loop trip. @@ -875,15 +934,14 @@ class TreeBuilder(object): level_leaf_counts = new_level_leaf_counts if debug: for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, - level_used_box_counts, + level_start_box_nrs, level_used_box_counts, level_leaf_counts): if level_nboxes == 0: assert leaf_count == 0 continue nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[ - level_start:level_start + level_nboxes]).get()) + cl.array.sum(box_has_children[level_start:level_start + + level_nboxes]).get()) assert leaf_count == nleaves_actual # Can't del in Py2.7 - see note below @@ -896,15 +954,14 @@ class TreeBuilder(object): # {{{ split boxes - box_splitter_args = ( - common_args - + (box_has_children, force_split_box, root_extent) - + box_child_ids - + box_centers) + box_splitter_args = (common_args + + (box_has_children, force_split_box, + root_extent) + box_child_ids + box_centers) - evt = knl_info.box_splitter_kernel(*box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) + evt = knl_info.box_splitter_kernel( + *box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) wait_for = [evt] @@ -919,8 +976,8 @@ class TreeBuilder(object): if debug: box_levels.finish() - level_bl_chunk = box_levels.get()[ - level_start_box_nrs[-2]:level_start_box_nrs[-1]] + level_bl_chunk = box_levels.get()[level_start_box_nrs[-2]: + level_start_box_nrs[-1]] assert (level_bl_chunk == level).all() del level_bl_chunk @@ -935,12 +992,13 @@ class TreeBuilder(object): new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) particle_renumberer_args = ( - common_args - + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + common_args + (box_has_children, force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) - evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, - range=slice(nsrcntgts), wait_for=wait_for) + evt = knl_info.particle_renumberer_kernel( + *particle_renumberer_args, + range=slice(nsrcntgts), + wait_for=wait_for) wait_for = [evt] @@ -978,7 +1036,7 @@ class TreeBuilder(object): LEVEL_STEP = 10 # noqa if level % LEVEL_STEP == 1: level_restrict_kernel = knl_info.level_restrict_kernel_builder( - LEVEL_STEP * div_ceil(level, LEVEL_STEP)) + LEVEL_STEP * div_ceil(level, LEVEL_STEP)) # Upward pass - check if leaf boxes at higher levels need # further splitting. @@ -1003,7 +1061,8 @@ class TreeBuilder(object): level_used_box_counts[-3::-1]): upper_level_slice = slice( - upper_level_start, upper_level_start + upper_level_box_count) + upper_level_start, + upper_level_start + upper_level_box_count) have_upper_level_split_box.fill(0) wait_for.extend(have_upper_level_split_box.events) @@ -1023,8 +1082,10 @@ class TreeBuilder(object): if debug: force_split_box.finish() - boxes_split.append(int(cl.array.sum( - force_split_box[upper_level_slice]).get())) + boxes_split.append( + int( + cl.array.sum( + force_split_box[upper_level_slice]).get())) if int(have_upper_level_split_box.get()) == 0: break @@ -1033,16 +1094,20 @@ class TreeBuilder(object): if debug: total_boxes_split = sum(boxes_split) - logger.debug("level restriction: {total_boxes_split} boxes split" - .format(total_boxes_split=total_boxes_split)) + logger.debug( + "level restriction: {total_boxes_split} boxes split" + .format(total_boxes_split=total_boxes_split)) from itertools import count for level_, nboxes_split in zip( count(level - 2, step=-1), boxes_split[:-1]): logger.debug("level {level}: {nboxes_split} boxes split" - .format(level=level_, nboxes_split=nboxes_split)) + .format( + level=level_, + nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: + if int(have_oversize_split_box.get() + ) == 0 and did_upper_level_split: # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1080,16 +1145,12 @@ class TreeBuilder(object): # empty boxes don't have box_morton_bin_counts written continue - kid_sum = sum( - h_box_srcntgt_counts_cumul[bci[ibox]] - for bci in h_box_child_ids - if bci[ibox] != 0) + kid_sum = sum(h_box_srcntgt_counts_cumul[bci[ibox]] + for bci in h_box_child_ids if bci[ibox] != 0) - if ( - h_box_srcntgt_counts_cumul[ibox] - != - h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] - + kid_sum): + if (h_box_srcntgt_counts_cumul[ibox] != + h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + + kid_sum): print("MISMATCH", level, ibox) has_mismatch = True @@ -1105,10 +1166,10 @@ class TreeBuilder(object): # }}} end_time = time() - elapsed = end_time-start_time - npasses = level+1 - logger.info("elapsed time: %g s (%g s/particle/pass)" % ( - elapsed, elapsed/(npasses*nsrcntgts))) + elapsed = end_time - start_time + npasses = level + 1 + logger.info("elapsed time: %g s (%g s/particle/pass)" % + (elapsed, elapsed / (npasses * nsrcntgts))) del npasses nboxes = level_start_box_nrs[-1] @@ -1125,25 +1186,26 @@ class TreeBuilder(object): highest_possibly_split_box_nr = level_start_box_nrs[-2] evt = knl_info.extract_nonchild_srcntgt_count_kernel( - # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, - highest_possibly_split_box_nr, - - # output - box_srcntgt_counts_nonchild, - - range=slice(nboxes), wait_for=wait_for) + # input + box_morton_bin_counts, + box_srcntgt_counts_cumul, + highest_possibly_split_box_nr, + + # output + box_srcntgt_counts_nonchild, + range=slice(nboxes), + wait_for=wait_for) wait_for = [evt] del highest_possibly_split_box_nr if debug: - h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() + h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get( + ) h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - assert (h_box_srcntgt_counts_nonchild - <= h_box_srcntgt_counts_cumul[:nboxes]).all() + assert (h_box_srcntgt_counts_nonchild <= + h_box_srcntgt_counts_cumul[:nboxes]).all() del h_box_srcntgt_counts_nonchild @@ -1174,14 +1236,17 @@ class TreeBuilder(object): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, - src_box_id, dst_box_id, nboxes_post_prune_dev, - size=nboxes, wait_for=wait_for) + box_srcntgt_counts_cumul, + src_box_id, + dst_box_id, + nboxes_post_prune_dev, + size=nboxes, + wait_for=wait_for) wait_for = [evt] nboxes_post_prune = int(nboxes_post_prune_dev.get()) logger.info("{} boxes after pruning " - "({} empty leaves and/or unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + "({} empty leaves and/or unused boxes removed)".format( + nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True elif knl_info.level_restrict: # Remove unused boxes from the tree. @@ -1194,21 +1259,27 @@ class TreeBuilder(object): for level_start, new_level_start, level_used_box_count in zip( level_start_box_nrs, new_level_start_box_nrs, level_used_box_counts): + def make_slice(start): return slice(start, start + level_used_box_count) def make_arange(start): - return cl.array.arange(queue, start, - start + level_used_box_count, dtype=box_id_dtype) - - src_box_id[make_slice(new_level_start)] = make_arange(level_start) - dst_box_id[make_slice(level_start)] = make_arange(new_level_start) + return cl.array.arange( + queue, + start, + start + level_used_box_count, + dtype=box_id_dtype) + + src_box_id[make_slice(new_level_start)] = make_arange( + level_start) + dst_box_id[make_slice(level_start)] = make_arange( + new_level_start) wait_for.extend(src_box_id.events + dst_box_id.events) nboxes_post_prune = new_level_start_box_nrs[-1] logger.info("{} boxes after pruning ({} unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True else: should_prune = False @@ -1216,25 +1287,31 @@ class TreeBuilder(object): if should_prune: prune_events = [] - prune_empty = partial(self.gappy_copy_and_map, - queue, allocator, nboxes_post_prune, - src_indices=src_box_id, - range=slice(nboxes_post_prune), debug=debug) + prune_empty = partial( + self.gappy_copy_and_map, + queue, + allocator, + nboxes_post_prune, + src_indices=src_box_id, + range=slice(nboxes_post_prune), + debug=debug) box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty( + box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty( + box_parent_ids, map_values=dst_box_id) prune_events.append(evt) box_levels, evt = prune_empty(box_levels) @@ -1242,19 +1319,17 @@ class TreeBuilder(object): if srcntgts_have_extent: box_srcntgt_counts_nonchild, evt = prune_empty( - box_srcntgt_counts_nonchild) + box_srcntgt_counts_nonchild) prune_events.append(evt) box_has_children, evt = prune_empty(box_has_children) prune_events.append(evt) - box_child_ids, evts = zip( - *(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(prune_empty(ary, map_values=dst_box_id) + for ary in box_child_ids)) prune_events.extend(evts) - box_centers, evts = zip( - *(prune_empty(ary) for ary in box_centers)) + box_centers, evts = zip(*(prune_empty(ary) for ary in box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. @@ -1302,9 +1377,13 @@ class TreeBuilder(object): source_numbers = empty(nsrcntgts, particle_id_dtype) fin_debug("source counter") - evt = knl_info.source_counter(user_srcntgt_ids, nsources, - source_numbers, queue=queue, allocator=allocator, - wait_for=wait_for) + evt = knl_info.source_counter( + user_srcntgt_ids, + nsources, + source_numbers, + queue=queue, + allocator=allocator, + wait_for=wait_for) wait_for = [evt] user_source_ids = empty(nsources, particle_id_dtype) @@ -1315,59 +1394,61 @@ class TreeBuilder(object): # need to use zeros because parent boxes won't be initialized box_source_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_source_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_starts, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if srcntgts_have_extent: - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("source and target index finder") - evt = knl_info.source_and_target_index_finder(*( - # input: - ( - user_srcntgt_ids, nsources, srcntgt_box_ids, - box_parent_ids, - - box_srcntgt_starts, box_srcntgt_counts_cumul, - source_numbers, - ) - + ((box_srcntgt_counts_nonchild,) - if srcntgts_have_extent else ()) + evt = knl_info.source_and_target_index_finder( + *( + # input: + ( + user_srcntgt_ids, + nsources, + srcntgt_box_ids, + box_parent_ids, + box_srcntgt_starts, + box_srcntgt_counts_cumul, + source_numbers, + ) + ((box_srcntgt_counts_nonchild, ) + if srcntgts_have_extent else ()) - # output: - + ( - user_source_ids, srcntgt_target_ids, sorted_target_ids, - box_source_starts, box_source_counts_cumul, - box_target_starts, box_target_counts_cumul, - ) - + (( - box_source_counts_nonchild, - box_target_counts_nonchild, - ) if srcntgts_have_extent else ()) - ), - queue=queue, range=slice(nsrcntgts), + # output: + + ( + user_source_ids, + srcntgt_target_ids, + sorted_target_ids, + box_source_starts, + box_source_counts_cumul, + box_target_starts, + box_target_counts_cumul, + ) + (( + box_source_counts_nonchild, + box_target_counts_nonchild, + ) if srcntgts_have_extent else ())), + queue=queue, + range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] if srcntgts_have_extent: if debug: - assert ( - box_srcntgt_counts_nonchild.get() - == - (box_source_counts_nonchild - + box_target_counts_nonchild).get()).all() + assert (box_srcntgt_counts_nonchild.get() == ( + box_source_counts_nonchild + + box_target_counts_nonchild).get()).all() if debug: usi_host = user_source_ids.get() @@ -1376,13 +1457,13 @@ class TreeBuilder(object): del usi_host sti_host = srcntgt_target_ids.get() - assert (sti_host < nsources+ntargets).all() + assert (sti_host < nsources + ntargets).all() assert (nsources <= sti_host).all() del sti_host - assert (box_source_counts_cumul.get() - + box_target_counts_cumul.get() - == box_srcntgt_counts_cumul.get()).all() + assert (box_source_counts_cumul.get() + + box_target_counts_cumul.get() == + box_srcntgt_counts_cumul.get()).all() del source_numbers @@ -1395,49 +1476,55 @@ class TreeBuilder(object): # {{{ permute and source/target-split (if necessary) particle array if targets is None: - sources = targets = make_obj_array([ - cl.array.empty_like(pt) for pt in srcntgts]) + sources = targets = make_obj_array( + [cl.array.empty_like(pt) for pt in srcntgts]) fin_debug("srcntgt permuter (particles)") evt = knl_info.srcntgt_permuter( - user_srcntgt_ids, - *(tuple(srcntgts) + tuple(sources)), - wait_for=wait_for) + user_srcntgt_ids, + *(tuple(srcntgts) + tuple(sources)), + wait_for=wait_for) wait_for = [evt] assert srcntgt_radii is None else: - sources = make_obj_array([ - empty(nsources, coord_dtype) for i in range(dimensions)]) + sources = make_obj_array( + [empty(nsources, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (sources)") evt = knl_info.srcntgt_permuter( - user_source_ids, - *(tuple(srcntgts) + tuple(sources)), - queue=queue, range=slice(nsources), - wait_for=wait_for) + user_source_ids, + *(tuple(srcntgts) + tuple(sources)), + queue=queue, + range=slice(nsources), + wait_for=wait_for) wait_for = [evt] - targets = make_obj_array([ - empty(ntargets, coord_dtype) for i in range(dimensions)]) + targets = make_obj_array( + [empty(ntargets, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (targets)") evt = knl_info.srcntgt_permuter( - srcntgt_target_ids, - *(tuple(srcntgts) + tuple(targets)), - queue=queue, range=slice(ntargets), - wait_for=wait_for) + srcntgt_target_ids, + *(tuple(srcntgts) + tuple(targets)), + queue=queue, + range=slice(ntargets), + wait_for=wait_for) wait_for = [evt] if srcntgt_radii is not None: fin_debug("srcntgt permuter (source radii)") source_radii = cl.array.take( - srcntgt_radii, user_source_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + user_source_ids, + queue=queue, + wait_for=wait_for) fin_debug("srcntgt permuter (target radii)") target_radii = cl.array.take( - srcntgt_radii, srcntgt_target_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + srcntgt_target_ids, + queue=queue, + wait_for=wait_for) wait_for = source_radii.events + target_radii.events @@ -1452,7 +1539,7 @@ class TreeBuilder(object): nlevels = len(level_start_box_nrs) - 1 assert nlevels == len(level_used_box_counts) - assert level + 1 == nlevels, (level+1, nlevels) + assert level + 1 == nlevels, (level + 1, nlevels) if debug: max_level = np.max(box_levels.get()) assert max_level + 1 == nlevels @@ -1462,9 +1549,10 @@ class TreeBuilder(object): # A number of arrays below are nominally 2-dimensional and stored with # the box index as the fastest-moving index. To make sure that accesses # remain aligned, we round up the number of boxes used for indexing. - aligned_nboxes = div_ceil(nboxes_post_prune, 32)*32 + aligned_nboxes = div_ceil(nboxes_post_prune, 32) * 32 - box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), box_id_dtype) + box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), + box_id_dtype) wait_for.append(evt) box_centers_new = empty((dimensions, aligned_nboxes), coord_dtype) @@ -1474,7 +1562,8 @@ class TreeBuilder(object): wait_for.extend(box_child_ids_new.events) for dim, center_row in enumerate(box_centers): - box_centers_new[dim, :nboxes_post_prune] = center_row[:nboxes_post_prune] + box_centers_new[dim, : + nboxes_post_prune] = center_row[:nboxes_post_prune] wait_for.extend(box_centers_new.events) cl.wait_for_events(wait_for) @@ -1518,33 +1607,38 @@ class TreeBuilder(object): # }}} - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if sources_are_targets: box_target_counts_nonchild = box_source_counts_nonchild else: - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("compute box info") evt = knl_info.box_info_kernel( - *( - # input: - box_parent_ids, box_srcntgt_counts_cumul, - box_source_counts_cumul, box_target_counts_cumul, - box_has_children, box_levels, nlevels, - - # output if srcntgts_have_extent, input+output otherwise - box_source_counts_nonchild, box_target_counts_nonchild, + *( + # input: + box_parent_ids, + box_srcntgt_counts_cumul, + box_source_counts_cumul, + box_target_counts_cumul, + box_has_children, + box_levels, + nlevels, + + # output if srcntgts_have_extent, input+output otherwise + box_source_counts_nonchild, + box_target_counts_nonchild, - # output: - box_flags, - ), - range=slice(nboxes_post_prune), - wait_for=wait_for) + # output: + box_flags, + ), + range=slice(nboxes_post_prune), + wait_for=wait_for) # }}} @@ -1562,52 +1656,42 @@ class TreeBuilder(object): logger.info("tree build complete") return Tree( - # If you change this, also change the documentation - # of what's in the tree, above. - - sources_are_targets=sources_are_targets, - sources_have_extent=sources_have_extent, - targets_have_extent=targets_have_extent, - - particle_id_dtype=knl_info.particle_id_dtype, - box_id_dtype=knl_info.box_id_dtype, - coord_dtype=coord_dtype, - box_level_dtype=self.box_level_dtype, - - root_extent=root_extent, - stick_out_factor=stick_out_factor, - extent_norm=srcntgts_extent_norm, - - bounding_box=(bbox_min, bbox_max), - level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, - - sources=sources, - targets=targets, - - box_source_starts=box_source_starts, - box_source_counts_nonchild=box_source_counts_nonchild, - box_source_counts_cumul=box_source_counts_cumul, - box_target_starts=box_target_starts, - box_target_counts_nonchild=box_target_counts_nonchild, - box_target_counts_cumul=box_target_counts_cumul, - - box_parent_ids=box_parent_ids, - box_child_ids=box_child_ids, - box_centers=box_centers, - box_levels=box_levels, - box_flags=box_flags, - - user_source_ids=user_source_ids, - sorted_target_ids=sorted_target_ids, - - _is_pruned=prune_empty_leaves, - - **extra_tree_attrs - ).with_queue(None), evt + # If you change this, also change the documentation + # of what's in the tree, above. + sources_are_targets=sources_are_targets, + sources_have_extent=sources_have_extent, + targets_have_extent=targets_have_extent, + particle_id_dtype=knl_info.particle_id_dtype, + box_id_dtype=knl_info.box_id_dtype, + coord_dtype=coord_dtype, + box_level_dtype=self.box_level_dtype, + root_extent=root_extent, + stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, + bounding_box=(bbox_min, bbox_max), + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + sources=sources, + targets=targets, + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + box_target_counts_cumul=box_target_counts_cumul, + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_flags=box_flags, + user_source_ids=user_source_ids, + sorted_target_ids=sorted_target_ids, + _is_pruned=prune_empty_leaves, + **extra_tree_attrs).with_queue(None), evt # }}} # }}} + # vim: foldmethod=marker:filetype=pyopencl -- GitLab From f8f2123d2e978d80b5d301204b894fd1bb107fbc Mon Sep 17 00:00:00 2001 From: xywei Date: Sat, 11 Nov 2017 09:58:13 -0600 Subject: [PATCH 02/16] Expose bounding box from tree builder --- boxtree/tree_build.py | 826 +++++++++++++++++++++++------------------- 1 file changed, 455 insertions(+), 371 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2566dc4..afa9c31 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,7 +22,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - from six.moves import range, zip import numpy as np @@ -59,27 +58,42 @@ class TreeBuilder(object): ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method - def get_kernel_info(self, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind): + def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, + box_id_dtype, sources_are_targets, srcntgts_extent_norm, + kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info - return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - self.morton_nr_dtype, self.box_level_dtype, + return get_tree_build_kernel_info( + self.context, + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + self.morton_nr_dtype, + self.box_level_dtype, kind=kind) # {{{ run control - def __call__(self, queue, particles, kind="adaptive", - max_particles_in_box=None, allocator=None, debug=False, - targets=None, source_radii=None, target_radii=None, - stick_out_factor=None, refine_weights=None, - max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, - **kwargs): + def __call__(self, + queue, + particles, + kind="adaptive", + max_particles_in_box=None, + allocator=None, + debug=False, + targets=None, + source_radii=None, + target_radii=None, + stick_out_factor=None, + refine_weights=None, + max_leaf_refine_weight=None, + wait_for=None, + extent_norm=None, + bbox=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -118,6 +132,8 @@ class TreeBuilder(object): execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: When specified, the bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -127,7 +143,9 @@ class TreeBuilder(object): # {{{ input processing - if kind not in ["adaptive", "adaptive-level-restricted", "non-adaptive"]: + if kind not in [ + "adaptive", "adaptive-level-restricted", "non-adaptive" + ]: raise ValueError("unknown tree kind \"{0}\"".format(kind)) # we'll modify this below, so copy it @@ -149,8 +167,8 @@ class TreeBuilder(object): extent_norm = "linf" if extent_norm not in ["linf", "l2"]: - raise ValueError("unexpected value of 'extent_norm': %s" - % extent_norm) + raise ValueError( + "unexpected value of 'extent_norm': %s" % extent_norm) srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent @@ -161,7 +179,7 @@ class TreeBuilder(object): if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " - "any kind of radii") + "any kind of radii") from pytools import single_valued particle_id_dtype = np.int32 @@ -176,25 +194,26 @@ class TreeBuilder(object): nsrcntgts = nsources + ntargets if source_radii is not None: - if source_radii.shape != (nsources,): + if source_radii.shape != (nsources, ): raise ValueError("source_radii has an invalid shape") if source_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "source_radii must agree") + "source_radii must agree") if target_radii is not None: - if target_radii.shape != (ntargets,): + if target_radii.shape != (ntargets, ): raise ValueError("target_radii has an invalid shape") if target_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "target_radii must agree") + "target_radii must agree") if sources_have_extent or targets_have_extent: if stick_out_factor is None: - raise ValueError("if sources or targets have extent, " - "stick_out_factor must be explicitly specified") + raise ValueError( + "if sources or targets have extent, " + "stick_out_factor must be explicitly specified") else: stick_out_factor = 0 @@ -207,10 +226,14 @@ class TreeBuilder(object): event, = result.events return result, event - knl_info = self.get_kernel_info(dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind=kind) + knl_info = self.get_kernel_info( + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + kind=kind) logger.info("tree build: start") @@ -240,7 +263,7 @@ class TreeBuilder(object): if target_coord_dtype != coord_dtype: raise TypeError("sources and targets must have same coordinate " - "dtype") + "dtype") def combine_srcntgt_arrays(ary1, ary2=None): if ary2 is None: @@ -264,10 +287,11 @@ class TreeBuilder(object): srcntgts = make_obj_array([ combine_srcntgt_arrays(src_i, tgt_i) for src_i, tgt_i in zip(particles, targets) - ]) + ]) if srcntgts_have_extent: - srcntgt_radii = combine_srcntgt_arrays(source_radii, target_radii) + srcntgt_radii = combine_srcntgt_arrays(source_radii, + target_radii) else: srcntgt_radii = None @@ -276,8 +300,8 @@ class TreeBuilder(object): del particles - user_srcntgt_ids = cl.array.arange(queue, nsrcntgts, dtype=particle_id_dtype, - allocator=allocator) + user_srcntgt_ids = cl.array.arange( + queue, nsrcntgts, dtype=particle_id_dtype, allocator=allocator) evt, = user_srcntgt_ids.events wait_for.append(evt) @@ -295,33 +319,34 @@ class TreeBuilder(object): if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif not specified_max_particles_in_box and not specified_refine_weights: raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif specified_max_particles_in_box: - refine_weights = ( - cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) + refine_weights = (cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) max_leaf_refine_weight = max_particles_in_box elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: - raise TypeError("refine_weights must have dtype '%s'" - % refine_weight_dtype) + raise TypeError( + "refine_weights must have dtype '%s'" % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight") + "entries of refine_weights cannot exceed max_leaf_refine_weight" + ) if 0 > cl.array.min(refine_weights).get(): - raise ValueError("all entries of refine_weights must be nonnegative") + raise ValueError( + "all entries of refine_weights must be nonnegative") if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box @@ -331,22 +356,35 @@ class TreeBuilder(object): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_"+ax] - bbox["min_"+ax] - for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( + 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] + else: + # all coordinates must be at the interior (boundary excluded) + # of the bbox + # Currently only cubic domain has ensured mesh reconstruction. + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert (bbox_min[i] < bbox_max[i]) + root_extent = max(bbox_max - bbox_min) # }}} @@ -356,7 +394,8 @@ class TreeBuilder(object): # box-local morton bin counts for each particle at the current level # only valid from scan -> split'n'sort - morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + morton_bin_counts = empty( + nsrcntgts, dtype=knl_info.morton_bin_count_dtype) # (local) morton nrs for each particle at the current level # only valid from scan -> split'n'sort @@ -376,8 +415,8 @@ class TreeBuilder(object): nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: nboxes_guess = 2**dimensions * ( - (max_leaf_refine_weight + total_refine_weight - 1) - // max_leaf_refine_weight) + (max_leaf_refine_weight + total_refine_weight - 1 + ) // max_leaf_refine_weight) assert nboxes_guess > 0 @@ -398,8 +437,8 @@ class TreeBuilder(object): prep_events.append(evt) # per-box morton bin counts - box_morton_bin_counts, evt = zeros(nboxes_guess, - dtype=knl_info.morton_bin_count_dtype) + box_morton_bin_counts, evt = zeros( + nboxes_guess, dtype=knl_info.morton_bin_count_dtype) prep_events.append(evt) # particle# at which each box starts @@ -411,18 +450,19 @@ class TreeBuilder(object): prep_events.append(evt) # pointer to child box, by morton number - box_child_ids, evts = zip( - *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) + box_child_ids, evts = zip(*(zeros(nboxes_guess, dtype=box_id_dtype) + for d in range(2**dimensions))) prep_events.extend(evts) # box centers, by dimension - box_centers, evts = zip( - *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) + box_centers, evts = zip(*(zeros(nboxes_guess, dtype=coord_dtype) + for d in range(dimensions))) prep_events.extend(evts) # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 + center_ax = bbox["min_" + + ax] + (bbox["max_" + ax] - bbox["min_" + ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map @@ -431,12 +471,12 @@ class TreeBuilder(object): # number of particles in each box # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) + box_srcntgt_counts_cumul, evt = zeros( + nboxes_guess, dtype=particle_id_dtype) prep_events.append(evt) # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill( - nsrcntgts, queue=queue, wait_for=[evt]) + box_srcntgt_counts_cumul[0].fill(nsrcntgts, queue=queue, wait_for=[evt]) # box -> whether the box has a child. FIXME: use smaller integer type box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) @@ -444,14 +484,14 @@ class TreeBuilder(object): # box -> whether the box needs a splitting to enforce level restriction. # FIXME: use smaller integer type - force_split_box, evt = zeros(nboxes_guess - if knl_info.level_restrict - else 0, dtype=np.dtype(np.int32)) + force_split_box, evt = zeros( + nboxes_guess if knl_info.level_restrict else 0, + dtype=np.dtype(np.int32)) prep_events.append(evt) # set parent of root box to itself - evt = cl.enqueue_copy( - queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) + evt = cl.enqueue_copy(queue, box_parent_ids.data, + np.zeros((), dtype=box_parent_ids.dtype)) prep_events.append(evt) nlevels_max = np.iinfo(self.box_level_dtype).max @@ -543,55 +583,50 @@ class TreeBuilder(object): if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ((morton_bin_counts, morton_nrs, - box_start_flags, - srcntgt_box_ids, split_box_ids, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_srcntgt_starts, box_srcntgt_counts_cumul, - box_parent_ids, box_levels, - level, bbox, - user_srcntgt_ids) - + tuple(srcntgts) - + ((srcntgt_radii,) if srcntgts_have_extent else ()) - ) + common_args = ( + (morton_bin_counts, morton_nrs, box_start_flags, + srcntgt_box_ids, split_box_ids, box_morton_bin_counts, + refine_weights, max_leaf_refine_weight, box_srcntgt_starts, + box_srcntgt_counts_cumul, box_parent_ids, box_levels, level, + bbox, user_srcntgt_ids) + tuple(srcntgts) + + ((srcntgt_radii, ) if srcntgts_have_extent else ())) fin_debug("morton count scan") morton_count_args = common_args if srcntgts_have_extent: - morton_count_args += (stick_out_factor,) + morton_count_args += (stick_out_factor, ) # writes: box_morton_bin_counts evt = knl_info.morton_count_scan( - *morton_count_args, queue=queue, size=nsrcntgts, - wait_for=wait_for) + *morton_count_args, + queue=queue, + size=nsrcntgts, + wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") # writes: box_has_children, split_box_ids evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, + srcntgt_box_ids, + box_srcntgt_counts_cumul, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_levels, + level_start_box_nrs_dev, + level_used_box_counts_dev, + force_split_box, + level, - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, - - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) + # output: + box_has_children, + split_box_ids, + have_oversize_split_box, + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) wait_for = [evt] # {{{ compute new level_used_box_counts, level_leaf_counts @@ -603,21 +638,20 @@ class TreeBuilder(object): last_box_on_prev_level = level_start_box_id - 1 new_level_used_box_counts.append( # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) + int(split_box_ids[last_box_on_prev_level].get()) - + level_start_box_id) # New leaf count = # old leaf count # + nr. new boxes from splitting parent's leaves # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = (level_leaf_counts - + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2 ** dimensions) - new_level_leaf_counts = np.append( - new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) + level_used_box_counts_diff = (new_level_used_box_counts - + np.append(level_used_box_counts, [0])) + new_level_leaf_counts = ( + level_leaf_counts + level_used_box_counts_diff[:-1] - + level_used_box_counts_diff[1:] // 2**dimensions) + new_level_leaf_counts = np.append(new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) del level_used_box_counts_diff # }}} @@ -638,7 +672,8 @@ class TreeBuilder(object): curr_upper_level_lengths = np.diff(level_start_box_nrs) minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) + [new_level_used_box_counts[:-1], curr_upper_level_lengths], + axis=0) minimal_new_level_length = new_level_used_box_counts[-1] # Allocate extra space at the end of the current level for higher @@ -652,7 +687,7 @@ class TreeBuilder(object): # Currently undocumented. lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) minimal_new_level_length += sum( - 2**(l*dimensions) * new_level_leaf_counts[level - l] + 2**(l * dimensions) * new_level_leaf_counts[level - l] for l in range(1, 1 + min(level, lr_lookbehind_levels))) nboxes_minimal = \ @@ -674,22 +709,25 @@ class TreeBuilder(object): # Recompute the level padding. for ulevel in range(level): upper_level_padding[ulevel] = sum( - 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] - for l in range( - 1, 1 + min(ulevel, lr_lookbehind_levels))) + 2**(l * dimensions) * new_level_leaf_counts[ulevel - l] + for l in range(1, + 1 + min(ulevel, lr_lookbehind_levels))) new_upper_level_unused_box_counts = np.max( - [upper_level_padding, - minimal_upper_level_lengths - new_level_used_box_counts[:-1]], + [ + upper_level_padding, minimal_upper_level_lengths - + new_level_used_box_counts[:-1] + ], axis=0) new_level_start_box_nrs = np.empty(level + 1, dtype=int) new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[1:] = np.cumsum( - new_level_used_box_counts[:-1] - + new_upper_level_unused_box_counts) + new_level_start_box_nrs[ + 1:] = np.cumsum(new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) - assert not (level_start_box_nrs == new_level_start_box_nrs).all() + assert not ( + level_start_box_nrs == new_level_start_box_nrs).all() # }}} @@ -697,8 +735,8 @@ class TreeBuilder(object): old_box_count = level_start_box_nrs[-1] # Where should I put this box? - dst_box_id = cl.array.empty(queue, - shape=old_box_count, dtype=box_id_dtype) + dst_box_id = cl.array.empty( + queue, shape=old_box_count, dtype=box_id_dtype) for level_start, new_level_start, level_len in zip( level_start_box_nrs, new_level_start_box_nrs, @@ -711,12 +749,17 @@ class TreeBuilder(object): wait_for.extend(dst_box_id.events) - realloc_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, map_values=dst_box_id, - range=slice(old_box_count), debug=debug) + realloc_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + map_values=dst_box_id, + range=slice(old_box_count), + debug=debug) renumber_array = partial(self.map_values_kernel, dst_box_id) # }}} @@ -753,22 +796,40 @@ class TreeBuilder(object): nboxes_guess *= 2 def my_realloc_nocopy(ary): - return cl.array.empty(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + return cl.array.empty( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + result = cl.array.zeros( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) return result, result.events[0] - my_realloc = partial(realloc_array, - queue, allocator, nboxes_guess, wait_for=wait_for) - my_realloc_zeros = partial(realloc_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) + my_realloc = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + wait_for=wait_for) + my_realloc_zeros = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial( + realloc_and_renumber_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) resize_events = [] @@ -780,7 +841,7 @@ class TreeBuilder(object): # currently being processed are written-but we need to # retain the box morton bin counts from the higher levels. box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) + box_morton_bin_counts) resize_events.append(evt) # force_split_box is unused unless level restriction is enabled. @@ -798,16 +859,16 @@ class TreeBuilder(object): box_has_children, evt = my_realloc_zeros(box_has_children) resize_events.append(evt) - box_centers, evts = zip( - *(my_realloc(ary) for ary in box_centers)) + box_centers, evts = zip(*(my_realloc(ary) + for ary in box_centers)) resize_events.extend(evts) - box_child_ids, evts = zip( - *(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(my_realloc_zeros_and_renumber(ary) + for ary in box_child_ids)) resize_events.extend(evts) - box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) + box_parent_ids, evt = my_realloc_zeros_and_renumber( + box_parent_ids) resize_events.append(evt) if not level_start_box_nrs_updated: @@ -816,8 +877,8 @@ class TreeBuilder(object): else: box_levels, evt = my_realloc_zeros_nocopy(box_levels) cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate(zip( - level_start_box_nrs, level_start_box_nrs[1:])): + for box_level, (level_start, level_end) in enumerate( + zip(level_start_box_nrs, level_start_box_nrs[1:])): box_levels[level_start:level_end].fill(box_level) resize_events.extend(box_levels.events) @@ -845,10 +906,8 @@ class TreeBuilder(object): logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) - assert ( - level_start_box_nrs[-1] != nboxes_new or - srcntgts_have_extent or - final_level_restrict_iteration) + assert (level_start_box_nrs[-1] != nboxes_new + or srcntgts_have_extent or final_level_restrict_iteration) if level_start_box_nrs[-1] == nboxes_new: # We haven't created new boxes in this level loop trip. @@ -875,15 +934,14 @@ class TreeBuilder(object): level_leaf_counts = new_level_leaf_counts if debug: for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, - level_used_box_counts, + level_start_box_nrs, level_used_box_counts, level_leaf_counts): if level_nboxes == 0: assert leaf_count == 0 continue nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[ - level_start:level_start + level_nboxes]).get()) + cl.array.sum(box_has_children[level_start:level_start + + level_nboxes]).get()) assert leaf_count == nleaves_actual # Can't del in Py2.7 - see note below @@ -896,15 +954,14 @@ class TreeBuilder(object): # {{{ split boxes - box_splitter_args = ( - common_args - + (box_has_children, force_split_box, root_extent) - + box_child_ids - + box_centers) + box_splitter_args = (common_args + + (box_has_children, force_split_box, + root_extent) + box_child_ids + box_centers) - evt = knl_info.box_splitter_kernel(*box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) + evt = knl_info.box_splitter_kernel( + *box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) wait_for = [evt] @@ -919,8 +976,8 @@ class TreeBuilder(object): if debug: box_levels.finish() - level_bl_chunk = box_levels.get()[ - level_start_box_nrs[-2]:level_start_box_nrs[-1]] + level_bl_chunk = box_levels.get()[level_start_box_nrs[-2]: + level_start_box_nrs[-1]] assert (level_bl_chunk == level).all() del level_bl_chunk @@ -935,12 +992,13 @@ class TreeBuilder(object): new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) particle_renumberer_args = ( - common_args - + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + common_args + (box_has_children, force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) - evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, - range=slice(nsrcntgts), wait_for=wait_for) + evt = knl_info.particle_renumberer_kernel( + *particle_renumberer_args, + range=slice(nsrcntgts), + wait_for=wait_for) wait_for = [evt] @@ -978,7 +1036,7 @@ class TreeBuilder(object): LEVEL_STEP = 10 # noqa if level % LEVEL_STEP == 1: level_restrict_kernel = knl_info.level_restrict_kernel_builder( - LEVEL_STEP * div_ceil(level, LEVEL_STEP)) + LEVEL_STEP * div_ceil(level, LEVEL_STEP)) # Upward pass - check if leaf boxes at higher levels need # further splitting. @@ -1003,7 +1061,8 @@ class TreeBuilder(object): level_used_box_counts[-3::-1]): upper_level_slice = slice( - upper_level_start, upper_level_start + upper_level_box_count) + upper_level_start, + upper_level_start + upper_level_box_count) have_upper_level_split_box.fill(0) wait_for.extend(have_upper_level_split_box.events) @@ -1023,8 +1082,10 @@ class TreeBuilder(object): if debug: force_split_box.finish() - boxes_split.append(int(cl.array.sum( - force_split_box[upper_level_slice]).get())) + boxes_split.append( + int( + cl.array.sum( + force_split_box[upper_level_slice]).get())) if int(have_upper_level_split_box.get()) == 0: break @@ -1033,16 +1094,20 @@ class TreeBuilder(object): if debug: total_boxes_split = sum(boxes_split) - logger.debug("level restriction: {total_boxes_split} boxes split" - .format(total_boxes_split=total_boxes_split)) + logger.debug( + "level restriction: {total_boxes_split} boxes split" + .format(total_boxes_split=total_boxes_split)) from itertools import count for level_, nboxes_split in zip( count(level - 2, step=-1), boxes_split[:-1]): logger.debug("level {level}: {nboxes_split} boxes split" - .format(level=level_, nboxes_split=nboxes_split)) + .format( + level=level_, + nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: + if int(have_oversize_split_box.get() + ) == 0 and did_upper_level_split: # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1080,16 +1145,12 @@ class TreeBuilder(object): # empty boxes don't have box_morton_bin_counts written continue - kid_sum = sum( - h_box_srcntgt_counts_cumul[bci[ibox]] - for bci in h_box_child_ids - if bci[ibox] != 0) + kid_sum = sum(h_box_srcntgt_counts_cumul[bci[ibox]] + for bci in h_box_child_ids if bci[ibox] != 0) - if ( - h_box_srcntgt_counts_cumul[ibox] - != - h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] - + kid_sum): + if (h_box_srcntgt_counts_cumul[ibox] != + h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + + kid_sum): print("MISMATCH", level, ibox) has_mismatch = True @@ -1105,10 +1166,10 @@ class TreeBuilder(object): # }}} end_time = time() - elapsed = end_time-start_time - npasses = level+1 - logger.info("elapsed time: %g s (%g s/particle/pass)" % ( - elapsed, elapsed/(npasses*nsrcntgts))) + elapsed = end_time - start_time + npasses = level + 1 + logger.info("elapsed time: %g s (%g s/particle/pass)" % + (elapsed, elapsed / (npasses * nsrcntgts))) del npasses nboxes = level_start_box_nrs[-1] @@ -1125,25 +1186,26 @@ class TreeBuilder(object): highest_possibly_split_box_nr = level_start_box_nrs[-2] evt = knl_info.extract_nonchild_srcntgt_count_kernel( - # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, - highest_possibly_split_box_nr, - - # output - box_srcntgt_counts_nonchild, - - range=slice(nboxes), wait_for=wait_for) + # input + box_morton_bin_counts, + box_srcntgt_counts_cumul, + highest_possibly_split_box_nr, + + # output + box_srcntgt_counts_nonchild, + range=slice(nboxes), + wait_for=wait_for) wait_for = [evt] del highest_possibly_split_box_nr if debug: - h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() + h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get( + ) h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - assert (h_box_srcntgt_counts_nonchild - <= h_box_srcntgt_counts_cumul[:nboxes]).all() + assert (h_box_srcntgt_counts_nonchild <= + h_box_srcntgt_counts_cumul[:nboxes]).all() del h_box_srcntgt_counts_nonchild @@ -1174,14 +1236,17 @@ class TreeBuilder(object): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, - src_box_id, dst_box_id, nboxes_post_prune_dev, - size=nboxes, wait_for=wait_for) + box_srcntgt_counts_cumul, + src_box_id, + dst_box_id, + nboxes_post_prune_dev, + size=nboxes, + wait_for=wait_for) wait_for = [evt] nboxes_post_prune = int(nboxes_post_prune_dev.get()) logger.info("{} boxes after pruning " - "({} empty leaves and/or unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + "({} empty leaves and/or unused boxes removed)".format( + nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True elif knl_info.level_restrict: # Remove unused boxes from the tree. @@ -1194,21 +1259,27 @@ class TreeBuilder(object): for level_start, new_level_start, level_used_box_count in zip( level_start_box_nrs, new_level_start_box_nrs, level_used_box_counts): + def make_slice(start): return slice(start, start + level_used_box_count) def make_arange(start): - return cl.array.arange(queue, start, - start + level_used_box_count, dtype=box_id_dtype) - - src_box_id[make_slice(new_level_start)] = make_arange(level_start) - dst_box_id[make_slice(level_start)] = make_arange(new_level_start) + return cl.array.arange( + queue, + start, + start + level_used_box_count, + dtype=box_id_dtype) + + src_box_id[make_slice(new_level_start)] = make_arange( + level_start) + dst_box_id[make_slice(level_start)] = make_arange( + new_level_start) wait_for.extend(src_box_id.events + dst_box_id.events) nboxes_post_prune = new_level_start_box_nrs[-1] logger.info("{} boxes after pruning ({} unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True else: should_prune = False @@ -1216,25 +1287,31 @@ class TreeBuilder(object): if should_prune: prune_events = [] - prune_empty = partial(self.gappy_copy_and_map, - queue, allocator, nboxes_post_prune, - src_indices=src_box_id, - range=slice(nboxes_post_prune), debug=debug) + prune_empty = partial( + self.gappy_copy_and_map, + queue, + allocator, + nboxes_post_prune, + src_indices=src_box_id, + range=slice(nboxes_post_prune), + debug=debug) box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty( + box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty( + box_parent_ids, map_values=dst_box_id) prune_events.append(evt) box_levels, evt = prune_empty(box_levels) @@ -1242,19 +1319,17 @@ class TreeBuilder(object): if srcntgts_have_extent: box_srcntgt_counts_nonchild, evt = prune_empty( - box_srcntgt_counts_nonchild) + box_srcntgt_counts_nonchild) prune_events.append(evt) box_has_children, evt = prune_empty(box_has_children) prune_events.append(evt) - box_child_ids, evts = zip( - *(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(prune_empty(ary, map_values=dst_box_id) + for ary in box_child_ids)) prune_events.extend(evts) - box_centers, evts = zip( - *(prune_empty(ary) for ary in box_centers)) + box_centers, evts = zip(*(prune_empty(ary) for ary in box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. @@ -1302,9 +1377,13 @@ class TreeBuilder(object): source_numbers = empty(nsrcntgts, particle_id_dtype) fin_debug("source counter") - evt = knl_info.source_counter(user_srcntgt_ids, nsources, - source_numbers, queue=queue, allocator=allocator, - wait_for=wait_for) + evt = knl_info.source_counter( + user_srcntgt_ids, + nsources, + source_numbers, + queue=queue, + allocator=allocator, + wait_for=wait_for) wait_for = [evt] user_source_ids = empty(nsources, particle_id_dtype) @@ -1315,59 +1394,61 @@ class TreeBuilder(object): # need to use zeros because parent boxes won't be initialized box_source_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_source_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_starts, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if srcntgts_have_extent: - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("source and target index finder") - evt = knl_info.source_and_target_index_finder(*( - # input: - ( - user_srcntgt_ids, nsources, srcntgt_box_ids, - box_parent_ids, - - box_srcntgt_starts, box_srcntgt_counts_cumul, - source_numbers, - ) - + ((box_srcntgt_counts_nonchild,) - if srcntgts_have_extent else ()) + evt = knl_info.source_and_target_index_finder( + *( + # input: + ( + user_srcntgt_ids, + nsources, + srcntgt_box_ids, + box_parent_ids, + box_srcntgt_starts, + box_srcntgt_counts_cumul, + source_numbers, + ) + ((box_srcntgt_counts_nonchild, ) + if srcntgts_have_extent else ()) - # output: - + ( - user_source_ids, srcntgt_target_ids, sorted_target_ids, - box_source_starts, box_source_counts_cumul, - box_target_starts, box_target_counts_cumul, - ) - + (( - box_source_counts_nonchild, - box_target_counts_nonchild, - ) if srcntgts_have_extent else ()) - ), - queue=queue, range=slice(nsrcntgts), + # output: + + ( + user_source_ids, + srcntgt_target_ids, + sorted_target_ids, + box_source_starts, + box_source_counts_cumul, + box_target_starts, + box_target_counts_cumul, + ) + (( + box_source_counts_nonchild, + box_target_counts_nonchild, + ) if srcntgts_have_extent else ())), + queue=queue, + range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] if srcntgts_have_extent: if debug: - assert ( - box_srcntgt_counts_nonchild.get() - == - (box_source_counts_nonchild - + box_target_counts_nonchild).get()).all() + assert (box_srcntgt_counts_nonchild.get() == ( + box_source_counts_nonchild + + box_target_counts_nonchild).get()).all() if debug: usi_host = user_source_ids.get() @@ -1376,13 +1457,13 @@ class TreeBuilder(object): del usi_host sti_host = srcntgt_target_ids.get() - assert (sti_host < nsources+ntargets).all() + assert (sti_host < nsources + ntargets).all() assert (nsources <= sti_host).all() del sti_host - assert (box_source_counts_cumul.get() - + box_target_counts_cumul.get() - == box_srcntgt_counts_cumul.get()).all() + assert (box_source_counts_cumul.get() + + box_target_counts_cumul.get() == + box_srcntgt_counts_cumul.get()).all() del source_numbers @@ -1395,49 +1476,55 @@ class TreeBuilder(object): # {{{ permute and source/target-split (if necessary) particle array if targets is None: - sources = targets = make_obj_array([ - cl.array.empty_like(pt) for pt in srcntgts]) + sources = targets = make_obj_array( + [cl.array.empty_like(pt) for pt in srcntgts]) fin_debug("srcntgt permuter (particles)") evt = knl_info.srcntgt_permuter( - user_srcntgt_ids, - *(tuple(srcntgts) + tuple(sources)), - wait_for=wait_for) + user_srcntgt_ids, + *(tuple(srcntgts) + tuple(sources)), + wait_for=wait_for) wait_for = [evt] assert srcntgt_radii is None else: - sources = make_obj_array([ - empty(nsources, coord_dtype) for i in range(dimensions)]) + sources = make_obj_array( + [empty(nsources, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (sources)") evt = knl_info.srcntgt_permuter( - user_source_ids, - *(tuple(srcntgts) + tuple(sources)), - queue=queue, range=slice(nsources), - wait_for=wait_for) + user_source_ids, + *(tuple(srcntgts) + tuple(sources)), + queue=queue, + range=slice(nsources), + wait_for=wait_for) wait_for = [evt] - targets = make_obj_array([ - empty(ntargets, coord_dtype) for i in range(dimensions)]) + targets = make_obj_array( + [empty(ntargets, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (targets)") evt = knl_info.srcntgt_permuter( - srcntgt_target_ids, - *(tuple(srcntgts) + tuple(targets)), - queue=queue, range=slice(ntargets), - wait_for=wait_for) + srcntgt_target_ids, + *(tuple(srcntgts) + tuple(targets)), + queue=queue, + range=slice(ntargets), + wait_for=wait_for) wait_for = [evt] if srcntgt_radii is not None: fin_debug("srcntgt permuter (source radii)") source_radii = cl.array.take( - srcntgt_radii, user_source_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + user_source_ids, + queue=queue, + wait_for=wait_for) fin_debug("srcntgt permuter (target radii)") target_radii = cl.array.take( - srcntgt_radii, srcntgt_target_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + srcntgt_target_ids, + queue=queue, + wait_for=wait_for) wait_for = source_radii.events + target_radii.events @@ -1452,7 +1539,7 @@ class TreeBuilder(object): nlevels = len(level_start_box_nrs) - 1 assert nlevels == len(level_used_box_counts) - assert level + 1 == nlevels, (level+1, nlevels) + assert level + 1 == nlevels, (level + 1, nlevels) if debug: max_level = np.max(box_levels.get()) assert max_level + 1 == nlevels @@ -1462,9 +1549,10 @@ class TreeBuilder(object): # A number of arrays below are nominally 2-dimensional and stored with # the box index as the fastest-moving index. To make sure that accesses # remain aligned, we round up the number of boxes used for indexing. - aligned_nboxes = div_ceil(nboxes_post_prune, 32)*32 + aligned_nboxes = div_ceil(nboxes_post_prune, 32) * 32 - box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), box_id_dtype) + box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), + box_id_dtype) wait_for.append(evt) box_centers_new = empty((dimensions, aligned_nboxes), coord_dtype) @@ -1474,7 +1562,8 @@ class TreeBuilder(object): wait_for.extend(box_child_ids_new.events) for dim, center_row in enumerate(box_centers): - box_centers_new[dim, :nboxes_post_prune] = center_row[:nboxes_post_prune] + box_centers_new[dim, : + nboxes_post_prune] = center_row[:nboxes_post_prune] wait_for.extend(box_centers_new.events) cl.wait_for_events(wait_for) @@ -1518,33 +1607,38 @@ class TreeBuilder(object): # }}} - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if sources_are_targets: box_target_counts_nonchild = box_source_counts_nonchild else: - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("compute box info") evt = knl_info.box_info_kernel( - *( - # input: - box_parent_ids, box_srcntgt_counts_cumul, - box_source_counts_cumul, box_target_counts_cumul, - box_has_children, box_levels, nlevels, - - # output if srcntgts_have_extent, input+output otherwise - box_source_counts_nonchild, box_target_counts_nonchild, + *( + # input: + box_parent_ids, + box_srcntgt_counts_cumul, + box_source_counts_cumul, + box_target_counts_cumul, + box_has_children, + box_levels, + nlevels, + + # output if srcntgts_have_extent, input+output otherwise + box_source_counts_nonchild, + box_target_counts_nonchild, - # output: - box_flags, - ), - range=slice(nboxes_post_prune), - wait_for=wait_for) + # output: + box_flags, + ), + range=slice(nboxes_post_prune), + wait_for=wait_for) # }}} @@ -1562,52 +1656,42 @@ class TreeBuilder(object): logger.info("tree build complete") return Tree( - # If you change this, also change the documentation - # of what's in the tree, above. - - sources_are_targets=sources_are_targets, - sources_have_extent=sources_have_extent, - targets_have_extent=targets_have_extent, - - particle_id_dtype=knl_info.particle_id_dtype, - box_id_dtype=knl_info.box_id_dtype, - coord_dtype=coord_dtype, - box_level_dtype=self.box_level_dtype, - - root_extent=root_extent, - stick_out_factor=stick_out_factor, - extent_norm=srcntgts_extent_norm, - - bounding_box=(bbox_min, bbox_max), - level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, - - sources=sources, - targets=targets, - - box_source_starts=box_source_starts, - box_source_counts_nonchild=box_source_counts_nonchild, - box_source_counts_cumul=box_source_counts_cumul, - box_target_starts=box_target_starts, - box_target_counts_nonchild=box_target_counts_nonchild, - box_target_counts_cumul=box_target_counts_cumul, - - box_parent_ids=box_parent_ids, - box_child_ids=box_child_ids, - box_centers=box_centers, - box_levels=box_levels, - box_flags=box_flags, - - user_source_ids=user_source_ids, - sorted_target_ids=sorted_target_ids, - - _is_pruned=prune_empty_leaves, - - **extra_tree_attrs - ).with_queue(None), evt + # If you change this, also change the documentation + # of what's in the tree, above. + sources_are_targets=sources_are_targets, + sources_have_extent=sources_have_extent, + targets_have_extent=targets_have_extent, + particle_id_dtype=knl_info.particle_id_dtype, + box_id_dtype=knl_info.box_id_dtype, + coord_dtype=coord_dtype, + box_level_dtype=self.box_level_dtype, + root_extent=root_extent, + stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, + bounding_box=(bbox_min, bbox_max), + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + sources=sources, + targets=targets, + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + box_target_counts_cumul=box_target_counts_cumul, + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_flags=box_flags, + user_source_ids=user_source_ids, + sorted_target_ids=sorted_target_ids, + _is_pruned=prune_empty_leaves, + **extra_tree_attrs).with_queue(None), evt # }}} # }}} + # vim: foldmethod=marker:filetype=pyopencl -- GitLab From 8012d886324185dd06fefcf136979cb59a37e28f Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 7 Feb 2018 20:32:00 -0600 Subject: [PATCH 03/16] Run through yapf --- boxtree/tree_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index afa9c31..06beb85 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -461,8 +461,8 @@ class TreeBuilder(object): # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_" - + ax] + (bbox["max_" + ax] - bbox["min_" + ax]) / 2 + center_ax = bbox["min_" + ax] + ( + bbox["max_" + ax] - bbox["min_" + ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map -- GitLab From b8030db3f2b7dcf9225ce257340e8b23db638ff4 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 7 Feb 2018 20:33:00 -0600 Subject: [PATCH 04/16] Swap in yapf formatted tree builder from upstream --- boxtree/tree_build.py | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 06beb85..bf2e949 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -92,7 +92,6 @@ class TreeBuilder(object): max_leaf_refine_weight=None, wait_for=None, extent_norm=None, - bbox=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -132,8 +131,6 @@ class TreeBuilder(object): execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. - :arg bbox: When specified, the bounding box is used for tree - building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -356,35 +353,22 @@ class TreeBuilder(object): # {{{ find and process bounding box - if bbox is None: - bbox, _ = self.bbox_finder( - srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( - 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] + for ax in axis_names) * (1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_" + ax] = bbox_max[i] - else: - # all coordinates must be at the interior (boundary excluded) - # of the bbox - # Currently only cubic domain has ensured mesh reconstruction. - bbox_min = np.empty(dimensions, coord_dtype) - bbox_max = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] - bbox_max[i] = bbox["max_" + ax] - assert (bbox_min[i] < bbox_max[i]) - root_extent = max(bbox_max - bbox_min) + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] # }}} -- GitLab From 04855371f5309b6aec67718004bcdfc9d0d60a79 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 7 Feb 2018 20:34:47 -0600 Subject: [PATCH 05/16] Reapply changes --- boxtree/tree_build.py | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index bf2e949..06beb85 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -92,6 +92,7 @@ class TreeBuilder(object): max_leaf_refine_weight=None, wait_for=None, extent_norm=None, + bbox=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -131,6 +132,8 @@ class TreeBuilder(object): execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: When specified, the bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -353,22 +356,35 @@ class TreeBuilder(object): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_" + ax] - bbox["min_" + ax] - for ax in axis_names) * (1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( + 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_" + ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] + else: + # all coordinates must be at the interior (boundary excluded) + # of the bbox + # Currently only cubic domain has ensured mesh reconstruction. + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert (bbox_min[i] < bbox_max[i]) + root_extent = max(bbox_max - bbox_min) # }}} -- GitLab From 114025162d2dd88e11d38f2ab91d490ce44add71 Mon Sep 17 00:00:00 2001 From: xywei Date: Thu, 8 Feb 2018 09:24:26 -0600 Subject: [PATCH 06/16] Restart from current master --- boxtree/tree_build.py | 826 +++++++++++++++++++----------------------- 1 file changed, 371 insertions(+), 455 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 06beb85..2566dc4 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ + from six.moves import range, zip import numpy as np @@ -58,42 +59,27 @@ class TreeBuilder(object): ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method - def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, - box_id_dtype, sources_are_targets, srcntgts_extent_norm, - kind): + def get_kernel_info(self, dimensions, coord_dtype, + particle_id_dtype, box_id_dtype, + sources_are_targets, srcntgts_extent_norm, + kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info - return get_tree_build_kernel_info( - self.context, - dimensions, - coord_dtype, - particle_id_dtype, - box_id_dtype, - sources_are_targets, - srcntgts_extent_norm, - self.morton_nr_dtype, - self.box_level_dtype, + return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, + particle_id_dtype, box_id_dtype, + sources_are_targets, srcntgts_extent_norm, + self.morton_nr_dtype, self.box_level_dtype, kind=kind) # {{{ run control - def __call__(self, - queue, - particles, - kind="adaptive", - max_particles_in_box=None, - allocator=None, - debug=False, - targets=None, - source_radii=None, - target_radii=None, - stick_out_factor=None, - refine_weights=None, - max_leaf_refine_weight=None, - wait_for=None, - extent_norm=None, - bbox=None, - **kwargs): + def __call__(self, queue, particles, kind="adaptive", + max_particles_in_box=None, allocator=None, debug=False, + targets=None, source_radii=None, target_radii=None, + stick_out_factor=None, refine_weights=None, + max_leaf_refine_weight=None, wait_for=None, + extent_norm=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -132,8 +118,6 @@ class TreeBuilder(object): execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. - :arg bbox: When specified, the bounding box is used for tree - building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -143,9 +127,7 @@ class TreeBuilder(object): # {{{ input processing - if kind not in [ - "adaptive", "adaptive-level-restricted", "non-adaptive" - ]: + if kind not in ["adaptive", "adaptive-level-restricted", "non-adaptive"]: raise ValueError("unknown tree kind \"{0}\"".format(kind)) # we'll modify this below, so copy it @@ -167,8 +149,8 @@ class TreeBuilder(object): extent_norm = "linf" if extent_norm not in ["linf", "l2"]: - raise ValueError( - "unexpected value of 'extent_norm': %s" % extent_norm) + raise ValueError("unexpected value of 'extent_norm': %s" + % extent_norm) srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent @@ -179,7 +161,7 @@ class TreeBuilder(object): if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " - "any kind of radii") + "any kind of radii") from pytools import single_valued particle_id_dtype = np.int32 @@ -194,26 +176,25 @@ class TreeBuilder(object): nsrcntgts = nsources + ntargets if source_radii is not None: - if source_radii.shape != (nsources, ): + if source_radii.shape != (nsources,): raise ValueError("source_radii has an invalid shape") if source_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "source_radii must agree") + "source_radii must agree") if target_radii is not None: - if target_radii.shape != (ntargets, ): + if target_radii.shape != (ntargets,): raise ValueError("target_radii has an invalid shape") if target_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "target_radii must agree") + "target_radii must agree") if sources_have_extent or targets_have_extent: if stick_out_factor is None: - raise ValueError( - "if sources or targets have extent, " - "stick_out_factor must be explicitly specified") + raise ValueError("if sources or targets have extent, " + "stick_out_factor must be explicitly specified") else: stick_out_factor = 0 @@ -226,14 +207,10 @@ class TreeBuilder(object): event, = result.events return result, event - knl_info = self.get_kernel_info( - dimensions, - coord_dtype, - particle_id_dtype, - box_id_dtype, - sources_are_targets, - srcntgts_extent_norm, - kind=kind) + knl_info = self.get_kernel_info(dimensions, coord_dtype, + particle_id_dtype, box_id_dtype, + sources_are_targets, srcntgts_extent_norm, + kind=kind) logger.info("tree build: start") @@ -263,7 +240,7 @@ class TreeBuilder(object): if target_coord_dtype != coord_dtype: raise TypeError("sources and targets must have same coordinate " - "dtype") + "dtype") def combine_srcntgt_arrays(ary1, ary2=None): if ary2 is None: @@ -287,11 +264,10 @@ class TreeBuilder(object): srcntgts = make_obj_array([ combine_srcntgt_arrays(src_i, tgt_i) for src_i, tgt_i in zip(particles, targets) - ]) + ]) if srcntgts_have_extent: - srcntgt_radii = combine_srcntgt_arrays(source_radii, - target_radii) + srcntgt_radii = combine_srcntgt_arrays(source_radii, target_radii) else: srcntgt_radii = None @@ -300,8 +276,8 @@ class TreeBuilder(object): del particles - user_srcntgt_ids = cl.array.arange( - queue, nsrcntgts, dtype=particle_id_dtype, allocator=allocator) + user_srcntgt_ids = cl.array.arange(queue, nsrcntgts, dtype=particle_id_dtype, + allocator=allocator) evt, = user_srcntgt_ids.events wait_for.append(evt) @@ -319,34 +295,33 @@ class TreeBuilder(object): if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif not specified_max_particles_in_box and not specified_refine_weights: raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif specified_max_particles_in_box: - refine_weights = (cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) + refine_weights = ( + cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) max_leaf_refine_weight = max_particles_in_box elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: - raise TypeError( - "refine_weights must have dtype '%s'" % refine_weight_dtype) + raise TypeError("refine_weights must have dtype '%s'" + % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight" - ) + "entries of refine_weights cannot exceed max_leaf_refine_weight") if 0 > cl.array.min(refine_weights).get(): - raise ValueError( - "all entries of refine_weights must be nonnegative") + raise ValueError("all entries of refine_weights must be nonnegative") if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box @@ -356,35 +331,22 @@ class TreeBuilder(object): # {{{ find and process bounding box - if bbox is None: - bbox, _ = self.bbox_finder( - srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( - 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_"+ax] - bbox["min_"+ax] + for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_"+ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_" + ax] = bbox_max[i] - else: - # all coordinates must be at the interior (boundary excluded) - # of the bbox - # Currently only cubic domain has ensured mesh reconstruction. - bbox_min = np.empty(dimensions, coord_dtype) - bbox_max = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] - bbox_max[i] = bbox["max_" + ax] - assert (bbox_min[i] < bbox_max[i]) - root_extent = max(bbox_max - bbox_min) + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_"+ax] = bbox_max[i] # }}} @@ -394,8 +356,7 @@ class TreeBuilder(object): # box-local morton bin counts for each particle at the current level # only valid from scan -> split'n'sort - morton_bin_counts = empty( - nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) # (local) morton nrs for each particle at the current level # only valid from scan -> split'n'sort @@ -415,8 +376,8 @@ class TreeBuilder(object): nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: nboxes_guess = 2**dimensions * ( - (max_leaf_refine_weight + total_refine_weight - 1 - ) // max_leaf_refine_weight) + (max_leaf_refine_weight + total_refine_weight - 1) + // max_leaf_refine_weight) assert nboxes_guess > 0 @@ -437,8 +398,8 @@ class TreeBuilder(object): prep_events.append(evt) # per-box morton bin counts - box_morton_bin_counts, evt = zeros( - nboxes_guess, dtype=knl_info.morton_bin_count_dtype) + box_morton_bin_counts, evt = zeros(nboxes_guess, + dtype=knl_info.morton_bin_count_dtype) prep_events.append(evt) # particle# at which each box starts @@ -450,19 +411,18 @@ class TreeBuilder(object): prep_events.append(evt) # pointer to child box, by morton number - box_child_ids, evts = zip(*(zeros(nboxes_guess, dtype=box_id_dtype) - for d in range(2**dimensions))) + box_child_ids, evts = zip( + *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) prep_events.extend(evts) # box centers, by dimension - box_centers, evts = zip(*(zeros(nboxes_guess, dtype=coord_dtype) - for d in range(dimensions))) + box_centers, evts = zip( + *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) prep_events.extend(evts) # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_" + ax] + ( - bbox["max_" + ax] - bbox["min_" + ax]) / 2 + center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map @@ -471,12 +431,12 @@ class TreeBuilder(object): # number of particles in each box # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros( - nboxes_guess, dtype=particle_id_dtype) + box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) prep_events.append(evt) # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill(nsrcntgts, queue=queue, wait_for=[evt]) + box_srcntgt_counts_cumul[0].fill( + nsrcntgts, queue=queue, wait_for=[evt]) # box -> whether the box has a child. FIXME: use smaller integer type box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) @@ -484,14 +444,14 @@ class TreeBuilder(object): # box -> whether the box needs a splitting to enforce level restriction. # FIXME: use smaller integer type - force_split_box, evt = zeros( - nboxes_guess if knl_info.level_restrict else 0, - dtype=np.dtype(np.int32)) + force_split_box, evt = zeros(nboxes_guess + if knl_info.level_restrict + else 0, dtype=np.dtype(np.int32)) prep_events.append(evt) # set parent of root box to itself - evt = cl.enqueue_copy(queue, box_parent_ids.data, - np.zeros((), dtype=box_parent_ids.dtype)) + evt = cl.enqueue_copy( + queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) prep_events.append(evt) nlevels_max = np.iinfo(self.box_level_dtype).max @@ -583,50 +543,55 @@ class TreeBuilder(object): if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ( - (morton_bin_counts, morton_nrs, box_start_flags, - srcntgt_box_ids, split_box_ids, box_morton_bin_counts, - refine_weights, max_leaf_refine_weight, box_srcntgt_starts, - box_srcntgt_counts_cumul, box_parent_ids, box_levels, level, - bbox, user_srcntgt_ids) + tuple(srcntgts) + - ((srcntgt_radii, ) if srcntgts_have_extent else ())) + common_args = ((morton_bin_counts, morton_nrs, + box_start_flags, + srcntgt_box_ids, split_box_ids, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_srcntgt_starts, box_srcntgt_counts_cumul, + box_parent_ids, box_levels, + level, bbox, + user_srcntgt_ids) + + tuple(srcntgts) + + ((srcntgt_radii,) if srcntgts_have_extent else ()) + ) fin_debug("morton count scan") morton_count_args = common_args if srcntgts_have_extent: - morton_count_args += (stick_out_factor, ) + morton_count_args += (stick_out_factor,) # writes: box_morton_bin_counts evt = knl_info.morton_count_scan( - *morton_count_args, - queue=queue, - size=nsrcntgts, - wait_for=wait_for) + *morton_count_args, queue=queue, size=nsrcntgts, + wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") # writes: box_has_children, split_box_ids evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, + srcntgt_box_ids, + box_srcntgt_counts_cumul, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_levels, + level_start_box_nrs_dev, + level_used_box_counts_dev, + force_split_box, + level, - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) + # output: + box_has_children, + split_box_ids, + have_oversize_split_box, + + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) wait_for = [evt] # {{{ compute new level_used_box_counts, level_leaf_counts @@ -638,20 +603,21 @@ class TreeBuilder(object): last_box_on_prev_level = level_start_box_id - 1 new_level_used_box_counts.append( # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) + int(split_box_ids[last_box_on_prev_level].get()) + - level_start_box_id) # New leaf count = # old leaf count # + nr. new boxes from splitting parent's leaves # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = ( - level_leaf_counts + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2**dimensions) - new_level_leaf_counts = np.append(new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) + level_used_box_counts_diff = (new_level_used_box_counts + - np.append(level_used_box_counts, [0])) + new_level_leaf_counts = (level_leaf_counts + + level_used_box_counts_diff[:-1] + - level_used_box_counts_diff[1:] // 2 ** dimensions) + new_level_leaf_counts = np.append( + new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) del level_used_box_counts_diff # }}} @@ -672,8 +638,7 @@ class TreeBuilder(object): curr_upper_level_lengths = np.diff(level_start_box_nrs) minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], - axis=0) + [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) minimal_new_level_length = new_level_used_box_counts[-1] # Allocate extra space at the end of the current level for higher @@ -687,7 +652,7 @@ class TreeBuilder(object): # Currently undocumented. lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) minimal_new_level_length += sum( - 2**(l * dimensions) * new_level_leaf_counts[level - l] + 2**(l*dimensions) * new_level_leaf_counts[level - l] for l in range(1, 1 + min(level, lr_lookbehind_levels))) nboxes_minimal = \ @@ -709,25 +674,22 @@ class TreeBuilder(object): # Recompute the level padding. for ulevel in range(level): upper_level_padding[ulevel] = sum( - 2**(l * dimensions) * new_level_leaf_counts[ulevel - l] - for l in range(1, - 1 + min(ulevel, lr_lookbehind_levels))) + 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] + for l in range( + 1, 1 + min(ulevel, lr_lookbehind_levels))) new_upper_level_unused_box_counts = np.max( - [ - upper_level_padding, minimal_upper_level_lengths - - new_level_used_box_counts[:-1] - ], + [upper_level_padding, + minimal_upper_level_lengths - new_level_used_box_counts[:-1]], axis=0) new_level_start_box_nrs = np.empty(level + 1, dtype=int) new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[ - 1:] = np.cumsum(new_level_used_box_counts[:-1] + - new_upper_level_unused_box_counts) + new_level_start_box_nrs[1:] = np.cumsum( + new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) - assert not ( - level_start_box_nrs == new_level_start_box_nrs).all() + assert not (level_start_box_nrs == new_level_start_box_nrs).all() # }}} @@ -735,8 +697,8 @@ class TreeBuilder(object): old_box_count = level_start_box_nrs[-1] # Where should I put this box? - dst_box_id = cl.array.empty( - queue, shape=old_box_count, dtype=box_id_dtype) + dst_box_id = cl.array.empty(queue, + shape=old_box_count, dtype=box_id_dtype) for level_start, new_level_start, level_len in zip( level_start_box_nrs, new_level_start_box_nrs, @@ -749,17 +711,12 @@ class TreeBuilder(object): wait_for.extend(dst_box_id.events) - realloc_array = partial( - self.gappy_copy_and_map, - dst_indices=dst_box_id, - range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial( - self.gappy_copy_and_map, - dst_indices=dst_box_id, - map_values=dst_box_id, - range=slice(old_box_count), - debug=debug) + realloc_array = partial(self.gappy_copy_and_map, + dst_indices=dst_box_id, range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial(self.gappy_copy_and_map, + dst_indices=dst_box_id, map_values=dst_box_id, + range=slice(old_box_count), debug=debug) renumber_array = partial(self.map_values_kernel, dst_box_id) # }}} @@ -796,40 +753,22 @@ class TreeBuilder(object): nboxes_guess *= 2 def my_realloc_nocopy(ary): - return cl.array.empty( - queue, - allocator=allocator, - shape=nboxes_guess, - dtype=ary.dtype) + return cl.array.empty(queue, allocator=allocator, + shape=nboxes_guess, dtype=ary.dtype) def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros( - queue, - allocator=allocator, - shape=nboxes_guess, - dtype=ary.dtype) + result = cl.array.zeros(queue, allocator=allocator, + shape=nboxes_guess, dtype=ary.dtype) return result, result.events[0] - my_realloc = partial( - realloc_array, - queue, - allocator, - nboxes_guess, - wait_for=wait_for) - my_realloc_zeros = partial( - realloc_array, - queue, - allocator, - nboxes_guess, - zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial( - realloc_and_renumber_array, - queue, - allocator, - nboxes_guess, - zero_fill=True, - wait_for=wait_for) + my_realloc = partial(realloc_array, + queue, allocator, nboxes_guess, wait_for=wait_for) + my_realloc_zeros = partial(realloc_array, + queue, allocator, nboxes_guess, zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, + queue, allocator, nboxes_guess, zero_fill=True, + wait_for=wait_for) resize_events = [] @@ -841,7 +780,7 @@ class TreeBuilder(object): # currently being processed are written-but we need to # retain the box morton bin counts from the higher levels. box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) + box_morton_bin_counts) resize_events.append(evt) # force_split_box is unused unless level restriction is enabled. @@ -859,16 +798,16 @@ class TreeBuilder(object): box_has_children, evt = my_realloc_zeros(box_has_children) resize_events.append(evt) - box_centers, evts = zip(*(my_realloc(ary) - for ary in box_centers)) + box_centers, evts = zip( + *(my_realloc(ary) for ary in box_centers)) resize_events.extend(evts) - box_child_ids, evts = zip(*(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) + box_child_ids, evts = zip( + *(my_realloc_zeros_and_renumber(ary) + for ary in box_child_ids)) resize_events.extend(evts) - box_parent_ids, evt = my_realloc_zeros_and_renumber( - box_parent_ids) + box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) resize_events.append(evt) if not level_start_box_nrs_updated: @@ -877,8 +816,8 @@ class TreeBuilder(object): else: box_levels, evt = my_realloc_zeros_nocopy(box_levels) cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate( - zip(level_start_box_nrs, level_start_box_nrs[1:])): + for box_level, (level_start, level_end) in enumerate(zip( + level_start_box_nrs, level_start_box_nrs[1:])): box_levels[level_start:level_end].fill(box_level) resize_events.extend(box_levels.events) @@ -906,8 +845,10 @@ class TreeBuilder(object): logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) - assert (level_start_box_nrs[-1] != nboxes_new - or srcntgts_have_extent or final_level_restrict_iteration) + assert ( + level_start_box_nrs[-1] != nboxes_new or + srcntgts_have_extent or + final_level_restrict_iteration) if level_start_box_nrs[-1] == nboxes_new: # We haven't created new boxes in this level loop trip. @@ -934,14 +875,15 @@ class TreeBuilder(object): level_leaf_counts = new_level_leaf_counts if debug: for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, level_used_box_counts, + level_start_box_nrs, + level_used_box_counts, level_leaf_counts): if level_nboxes == 0: assert leaf_count == 0 continue nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[level_start:level_start + - level_nboxes]).get()) + cl.array.sum(box_has_children[ + level_start:level_start + level_nboxes]).get()) assert leaf_count == nleaves_actual # Can't del in Py2.7 - see note below @@ -954,14 +896,15 @@ class TreeBuilder(object): # {{{ split boxes - box_splitter_args = (common_args + - (box_has_children, force_split_box, - root_extent) + box_child_ids + box_centers) + box_splitter_args = ( + common_args + + (box_has_children, force_split_box, root_extent) + + box_child_ids + + box_centers) - evt = knl_info.box_splitter_kernel( - *box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) + evt = knl_info.box_splitter_kernel(*box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) wait_for = [evt] @@ -976,8 +919,8 @@ class TreeBuilder(object): if debug: box_levels.finish() - level_bl_chunk = box_levels.get()[level_start_box_nrs[-2]: - level_start_box_nrs[-1]] + level_bl_chunk = box_levels.get()[ + level_start_box_nrs[-2]:level_start_box_nrs[-1]] assert (level_bl_chunk == level).all() del level_bl_chunk @@ -992,13 +935,12 @@ class TreeBuilder(object): new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) particle_renumberer_args = ( - common_args + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + common_args + + (box_has_children, force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) - evt = knl_info.particle_renumberer_kernel( - *particle_renumberer_args, - range=slice(nsrcntgts), - wait_for=wait_for) + evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, + range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] @@ -1036,7 +978,7 @@ class TreeBuilder(object): LEVEL_STEP = 10 # noqa if level % LEVEL_STEP == 1: level_restrict_kernel = knl_info.level_restrict_kernel_builder( - LEVEL_STEP * div_ceil(level, LEVEL_STEP)) + LEVEL_STEP * div_ceil(level, LEVEL_STEP)) # Upward pass - check if leaf boxes at higher levels need # further splitting. @@ -1061,8 +1003,7 @@ class TreeBuilder(object): level_used_box_counts[-3::-1]): upper_level_slice = slice( - upper_level_start, - upper_level_start + upper_level_box_count) + upper_level_start, upper_level_start + upper_level_box_count) have_upper_level_split_box.fill(0) wait_for.extend(have_upper_level_split_box.events) @@ -1082,10 +1023,8 @@ class TreeBuilder(object): if debug: force_split_box.finish() - boxes_split.append( - int( - cl.array.sum( - force_split_box[upper_level_slice]).get())) + boxes_split.append(int(cl.array.sum( + force_split_box[upper_level_slice]).get())) if int(have_upper_level_split_box.get()) == 0: break @@ -1094,20 +1033,16 @@ class TreeBuilder(object): if debug: total_boxes_split = sum(boxes_split) - logger.debug( - "level restriction: {total_boxes_split} boxes split" - .format(total_boxes_split=total_boxes_split)) + logger.debug("level restriction: {total_boxes_split} boxes split" + .format(total_boxes_split=total_boxes_split)) from itertools import count for level_, nboxes_split in zip( count(level - 2, step=-1), boxes_split[:-1]): logger.debug("level {level}: {nboxes_split} boxes split" - .format( - level=level_, - nboxes_split=nboxes_split)) + .format(level=level_, nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get() - ) == 0 and did_upper_level_split: + if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1145,12 +1080,16 @@ class TreeBuilder(object): # empty boxes don't have box_morton_bin_counts written continue - kid_sum = sum(h_box_srcntgt_counts_cumul[bci[ibox]] - for bci in h_box_child_ids if bci[ibox] != 0) + kid_sum = sum( + h_box_srcntgt_counts_cumul[bci[ibox]] + for bci in h_box_child_ids + if bci[ibox] != 0) - if (h_box_srcntgt_counts_cumul[ibox] != - h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + - kid_sum): + if ( + h_box_srcntgt_counts_cumul[ibox] + != + h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + + kid_sum): print("MISMATCH", level, ibox) has_mismatch = True @@ -1166,10 +1105,10 @@ class TreeBuilder(object): # }}} end_time = time() - elapsed = end_time - start_time - npasses = level + 1 - logger.info("elapsed time: %g s (%g s/particle/pass)" % - (elapsed, elapsed / (npasses * nsrcntgts))) + elapsed = end_time-start_time + npasses = level+1 + logger.info("elapsed time: %g s (%g s/particle/pass)" % ( + elapsed, elapsed/(npasses*nsrcntgts))) del npasses nboxes = level_start_box_nrs[-1] @@ -1186,26 +1125,25 @@ class TreeBuilder(object): highest_possibly_split_box_nr = level_start_box_nrs[-2] evt = knl_info.extract_nonchild_srcntgt_count_kernel( - # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, - highest_possibly_split_box_nr, - - # output - box_srcntgt_counts_nonchild, - range=slice(nboxes), - wait_for=wait_for) + # input + box_morton_bin_counts, + box_srcntgt_counts_cumul, + highest_possibly_split_box_nr, + + # output + box_srcntgt_counts_nonchild, + + range=slice(nboxes), wait_for=wait_for) wait_for = [evt] del highest_possibly_split_box_nr if debug: - h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get( - ) + h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - assert (h_box_srcntgt_counts_nonchild <= - h_box_srcntgt_counts_cumul[:nboxes]).all() + assert (h_box_srcntgt_counts_nonchild + <= h_box_srcntgt_counts_cumul[:nboxes]).all() del h_box_srcntgt_counts_nonchild @@ -1236,17 +1174,14 @@ class TreeBuilder(object): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, - src_box_id, - dst_box_id, - nboxes_post_prune_dev, - size=nboxes, - wait_for=wait_for) + box_srcntgt_counts_cumul, + src_box_id, dst_box_id, nboxes_post_prune_dev, + size=nboxes, wait_for=wait_for) wait_for = [evt] nboxes_post_prune = int(nboxes_post_prune_dev.get()) logger.info("{} boxes after pruning " - "({} empty leaves and/or unused boxes removed)".format( - nboxes_post_prune, nboxes - nboxes_post_prune)) + "({} empty leaves and/or unused boxes removed)" + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True elif knl_info.level_restrict: # Remove unused boxes from the tree. @@ -1259,27 +1194,21 @@ class TreeBuilder(object): for level_start, new_level_start, level_used_box_count in zip( level_start_box_nrs, new_level_start_box_nrs, level_used_box_counts): - def make_slice(start): return slice(start, start + level_used_box_count) def make_arange(start): - return cl.array.arange( - queue, - start, - start + level_used_box_count, - dtype=box_id_dtype) - - src_box_id[make_slice(new_level_start)] = make_arange( - level_start) - dst_box_id[make_slice(level_start)] = make_arange( - new_level_start) + return cl.array.arange(queue, start, + start + level_used_box_count, dtype=box_id_dtype) + + src_box_id[make_slice(new_level_start)] = make_arange(level_start) + dst_box_id[make_slice(level_start)] = make_arange(new_level_start) wait_for.extend(src_box_id.events + dst_box_id.events) nboxes_post_prune = new_level_start_box_nrs[-1] logger.info("{} boxes after pruning ({} unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True else: should_prune = False @@ -1287,31 +1216,25 @@ class TreeBuilder(object): if should_prune: prune_events = [] - prune_empty = partial( - self.gappy_copy_and_map, - queue, - allocator, - nboxes_post_prune, - src_indices=src_box_id, - range=slice(nboxes_post_prune), - debug=debug) + prune_empty = partial(self.gappy_copy_and_map, + queue, allocator, nboxes_post_prune, + src_indices=src_box_id, + range=slice(nboxes_post_prune), debug=debug) box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty( - box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty( - box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) prune_events.append(evt) box_levels, evt = prune_empty(box_levels) @@ -1319,17 +1242,19 @@ class TreeBuilder(object): if srcntgts_have_extent: box_srcntgt_counts_nonchild, evt = prune_empty( - box_srcntgt_counts_nonchild) + box_srcntgt_counts_nonchild) prune_events.append(evt) box_has_children, evt = prune_empty(box_has_children) prune_events.append(evt) - box_child_ids, evts = zip(*(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + box_child_ids, evts = zip( + *(prune_empty(ary, map_values=dst_box_id) + for ary in box_child_ids)) prune_events.extend(evts) - box_centers, evts = zip(*(prune_empty(ary) for ary in box_centers)) + box_centers, evts = zip( + *(prune_empty(ary) for ary in box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. @@ -1377,13 +1302,9 @@ class TreeBuilder(object): source_numbers = empty(nsrcntgts, particle_id_dtype) fin_debug("source counter") - evt = knl_info.source_counter( - user_srcntgt_ids, - nsources, - source_numbers, - queue=queue, - allocator=allocator, - wait_for=wait_for) + evt = knl_info.source_counter(user_srcntgt_ids, nsources, + source_numbers, queue=queue, allocator=allocator, + wait_for=wait_for) wait_for = [evt] user_source_ids = empty(nsources, particle_id_dtype) @@ -1394,61 +1315,59 @@ class TreeBuilder(object): # need to use zeros because parent boxes won't be initialized box_source_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_source_counts_cumul, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_source_counts_cumul, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) + box_target_starts, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_cumul, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_target_counts_cumul, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) if srcntgts_have_extent: - box_source_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_source_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_target_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) fin_debug("source and target index finder") - evt = knl_info.source_and_target_index_finder( - *( - # input: - ( - user_srcntgt_ids, - nsources, - srcntgt_box_ids, - box_parent_ids, - box_srcntgt_starts, - box_srcntgt_counts_cumul, - source_numbers, - ) + ((box_srcntgt_counts_nonchild, ) - if srcntgts_have_extent else ()) + evt = knl_info.source_and_target_index_finder(*( + # input: + ( + user_srcntgt_ids, nsources, srcntgt_box_ids, + box_parent_ids, - # output: - + ( - user_source_ids, - srcntgt_target_ids, - sorted_target_ids, - box_source_starts, - box_source_counts_cumul, - box_target_starts, - box_target_counts_cumul, - ) + (( - box_source_counts_nonchild, - box_target_counts_nonchild, - ) if srcntgts_have_extent else ())), - queue=queue, - range=slice(nsrcntgts), + box_srcntgt_starts, box_srcntgt_counts_cumul, + source_numbers, + ) + + ((box_srcntgt_counts_nonchild,) + if srcntgts_have_extent else ()) + + # output: + + ( + user_source_ids, srcntgt_target_ids, sorted_target_ids, + box_source_starts, box_source_counts_cumul, + box_target_starts, box_target_counts_cumul, + ) + + (( + box_source_counts_nonchild, + box_target_counts_nonchild, + ) if srcntgts_have_extent else ()) + ), + queue=queue, range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] if srcntgts_have_extent: if debug: - assert (box_srcntgt_counts_nonchild.get() == ( - box_source_counts_nonchild + - box_target_counts_nonchild).get()).all() + assert ( + box_srcntgt_counts_nonchild.get() + == + (box_source_counts_nonchild + + box_target_counts_nonchild).get()).all() if debug: usi_host = user_source_ids.get() @@ -1457,13 +1376,13 @@ class TreeBuilder(object): del usi_host sti_host = srcntgt_target_ids.get() - assert (sti_host < nsources + ntargets).all() + assert (sti_host < nsources+ntargets).all() assert (nsources <= sti_host).all() del sti_host - assert (box_source_counts_cumul.get() + - box_target_counts_cumul.get() == - box_srcntgt_counts_cumul.get()).all() + assert (box_source_counts_cumul.get() + + box_target_counts_cumul.get() + == box_srcntgt_counts_cumul.get()).all() del source_numbers @@ -1476,55 +1395,49 @@ class TreeBuilder(object): # {{{ permute and source/target-split (if necessary) particle array if targets is None: - sources = targets = make_obj_array( - [cl.array.empty_like(pt) for pt in srcntgts]) + sources = targets = make_obj_array([ + cl.array.empty_like(pt) for pt in srcntgts]) fin_debug("srcntgt permuter (particles)") evt = knl_info.srcntgt_permuter( - user_srcntgt_ids, - *(tuple(srcntgts) + tuple(sources)), - wait_for=wait_for) + user_srcntgt_ids, + *(tuple(srcntgts) + tuple(sources)), + wait_for=wait_for) wait_for = [evt] assert srcntgt_radii is None else: - sources = make_obj_array( - [empty(nsources, coord_dtype) for i in range(dimensions)]) + sources = make_obj_array([ + empty(nsources, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (sources)") evt = knl_info.srcntgt_permuter( - user_source_ids, - *(tuple(srcntgts) + tuple(sources)), - queue=queue, - range=slice(nsources), - wait_for=wait_for) + user_source_ids, + *(tuple(srcntgts) + tuple(sources)), + queue=queue, range=slice(nsources), + wait_for=wait_for) wait_for = [evt] - targets = make_obj_array( - [empty(ntargets, coord_dtype) for i in range(dimensions)]) + targets = make_obj_array([ + empty(ntargets, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (targets)") evt = knl_info.srcntgt_permuter( - srcntgt_target_ids, - *(tuple(srcntgts) + tuple(targets)), - queue=queue, - range=slice(ntargets), - wait_for=wait_for) + srcntgt_target_ids, + *(tuple(srcntgts) + tuple(targets)), + queue=queue, range=slice(ntargets), + wait_for=wait_for) wait_for = [evt] if srcntgt_radii is not None: fin_debug("srcntgt permuter (source radii)") source_radii = cl.array.take( - srcntgt_radii, - user_source_ids, - queue=queue, - wait_for=wait_for) + srcntgt_radii, user_source_ids, queue=queue, + wait_for=wait_for) fin_debug("srcntgt permuter (target radii)") target_radii = cl.array.take( - srcntgt_radii, - srcntgt_target_ids, - queue=queue, - wait_for=wait_for) + srcntgt_radii, srcntgt_target_ids, queue=queue, + wait_for=wait_for) wait_for = source_radii.events + target_radii.events @@ -1539,7 +1452,7 @@ class TreeBuilder(object): nlevels = len(level_start_box_nrs) - 1 assert nlevels == len(level_used_box_counts) - assert level + 1 == nlevels, (level + 1, nlevels) + assert level + 1 == nlevels, (level+1, nlevels) if debug: max_level = np.max(box_levels.get()) assert max_level + 1 == nlevels @@ -1549,10 +1462,9 @@ class TreeBuilder(object): # A number of arrays below are nominally 2-dimensional and stored with # the box index as the fastest-moving index. To make sure that accesses # remain aligned, we round up the number of boxes used for indexing. - aligned_nboxes = div_ceil(nboxes_post_prune, 32) * 32 + aligned_nboxes = div_ceil(nboxes_post_prune, 32)*32 - box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), - box_id_dtype) + box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), box_id_dtype) wait_for.append(evt) box_centers_new = empty((dimensions, aligned_nboxes), coord_dtype) @@ -1562,8 +1474,7 @@ class TreeBuilder(object): wait_for.extend(box_child_ids_new.events) for dim, center_row in enumerate(box_centers): - box_centers_new[dim, : - nboxes_post_prune] = center_row[:nboxes_post_prune] + box_centers_new[dim, :nboxes_post_prune] = center_row[:nboxes_post_prune] wait_for.extend(box_centers_new.events) cl.wait_for_events(wait_for) @@ -1607,38 +1518,33 @@ class TreeBuilder(object): # }}} - box_source_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_source_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) if sources_are_targets: box_target_counts_nonchild = box_source_counts_nonchild else: - box_target_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_target_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) fin_debug("compute box info") evt = knl_info.box_info_kernel( - *( - # input: - box_parent_ids, - box_srcntgt_counts_cumul, - box_source_counts_cumul, - box_target_counts_cumul, - box_has_children, - box_levels, - nlevels, - - # output if srcntgts_have_extent, input+output otherwise - box_source_counts_nonchild, - box_target_counts_nonchild, + *( + # input: + box_parent_ids, box_srcntgt_counts_cumul, + box_source_counts_cumul, box_target_counts_cumul, + box_has_children, box_levels, nlevels, - # output: - box_flags, - ), - range=slice(nboxes_post_prune), - wait_for=wait_for) + # output if srcntgts_have_extent, input+output otherwise + box_source_counts_nonchild, box_target_counts_nonchild, + + # output: + box_flags, + ), + range=slice(nboxes_post_prune), + wait_for=wait_for) # }}} @@ -1656,42 +1562,52 @@ class TreeBuilder(object): logger.info("tree build complete") return Tree( - # If you change this, also change the documentation - # of what's in the tree, above. - sources_are_targets=sources_are_targets, - sources_have_extent=sources_have_extent, - targets_have_extent=targets_have_extent, - particle_id_dtype=knl_info.particle_id_dtype, - box_id_dtype=knl_info.box_id_dtype, - coord_dtype=coord_dtype, - box_level_dtype=self.box_level_dtype, - root_extent=root_extent, - stick_out_factor=stick_out_factor, - extent_norm=srcntgts_extent_norm, - bounding_box=(bbox_min, bbox_max), - level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, - sources=sources, - targets=targets, - box_source_starts=box_source_starts, - box_source_counts_nonchild=box_source_counts_nonchild, - box_source_counts_cumul=box_source_counts_cumul, - box_target_starts=box_target_starts, - box_target_counts_nonchild=box_target_counts_nonchild, - box_target_counts_cumul=box_target_counts_cumul, - box_parent_ids=box_parent_ids, - box_child_ids=box_child_ids, - box_centers=box_centers, - box_levels=box_levels, - box_flags=box_flags, - user_source_ids=user_source_ids, - sorted_target_ids=sorted_target_ids, - _is_pruned=prune_empty_leaves, - **extra_tree_attrs).with_queue(None), evt + # If you change this, also change the documentation + # of what's in the tree, above. + + sources_are_targets=sources_are_targets, + sources_have_extent=sources_have_extent, + targets_have_extent=targets_have_extent, + + particle_id_dtype=knl_info.particle_id_dtype, + box_id_dtype=knl_info.box_id_dtype, + coord_dtype=coord_dtype, + box_level_dtype=self.box_level_dtype, + + root_extent=root_extent, + stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, + + bounding_box=(bbox_min, bbox_max), + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + + sources=sources, + targets=targets, + + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + box_target_counts_cumul=box_target_counts_cumul, + + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_flags=box_flags, + + user_source_ids=user_source_ids, + sorted_target_ids=sorted_target_ids, + + _is_pruned=prune_empty_leaves, + + **extra_tree_attrs + ).with_queue(None), evt # }}} # }}} - # vim: foldmethod=marker:filetype=pyopencl -- GitLab From d1bce1dcc1206ded0880d861d28387a925a51313 Mon Sep 17 00:00:00 2001 From: xywei Date: Thu, 8 Feb 2018 10:03:49 -0600 Subject: [PATCH 07/16] Re-apply changes, with minimal diff --- boxtree/tree_build.py | 51 +++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2566dc4..b3438ef 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -78,7 +78,7 @@ class TreeBuilder(object): targets=None, source_radii=None, target_radii=None, stick_out_factor=None, refine_weights=None, max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, + extent_norm=None, bbox=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -118,6 +118,10 @@ class TreeBuilder(object): execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: Bounding box of the same type as returned by + *boxtree.bounding_box.make_bounding_box_dtype*. + When given, this bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -331,22 +335,45 @@ class TreeBuilder(object): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( + root_extent = max( bbox["max_"+ax] - bbox["min_"+ax] for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_"+ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_"+ax] = bbox_max[i] + else: + # Validate that bbox is a superset of particle-derived bbox + bbox_auto, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox_auto = bbox_auto.get() + + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert bbox_min[i] < bbox_max[i] + assert bbox_min[i] <= bbox_auto["min_" + ax] + assert bbox_max[i] >= bbox_auto["max_" + ax] + + # bbox must be a square + bbox_exts = bbox_max - bbox_min + for ext in bbox_exts: + assert abs(ext - bbox_exts[0]) < 1e-15 + + root_extent = bbox_exts[0] # }}} -- GitLab From b777717dce3e52e878558bae3fb560b592f8a39b Mon Sep 17 00:00:00 2001 From: xywei Date: Sun, 17 Jun 2018 21:26:39 +0800 Subject: [PATCH 08/16] Add uniform boxtree --- boxtree/tree_interactive_build.py | 258 +++++++++++++++++++++++++++ boxtree/visualization.py | 94 ++++++++++ examples/interactive_boxtree_demo.py | 26 +++ 3 files changed, 378 insertions(+) create mode 100644 boxtree/tree_interactive_build.py create mode 100644 examples/interactive_boxtree_demo.py diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py new file mode 100644 index 0000000..0f3ab50 --- /dev/null +++ b/boxtree/tree_interactive_build.py @@ -0,0 +1,258 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Xiaoyu Wei" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" +import numpy as np +from itertools import product + +import pyopencl as cl +import pyopencl.array # noqa +from pytools.obj_array import make_obj_array +from boxtree.tools import DeviceDataRecord + +# {{{ Data structure for a tree of boxes + +class BoxTree(DeviceDataRecord): + r"""A quad/oct-tree tree consisting of a hierarchy of boxes. + The intended use for this data structure is to generate particles + as quadrature points in each leaf box. Those particles can then + be used to build a Tree object and then generate the Traversal for + FMM. + + This class is designed such that it is easy to adaptively modify + the tree structure (refine/coarsen). Unlike flattened Tree class, + it handles itself and does not rely on external tree builders. + + .. ------------------------------------------------------------------------ + .. rubric:: Data types + .. ------------------------------------------------------------------------ + + .. attribute:: box_id_dtype + .. attribute:: coord_dtype + .. attribute:: box_level_dtype + + .. ------------------------------------------------------------------------ + .. rubric:: Counts and sizes + .. ------------------------------------------------------------------------ + + .. attribute:: root_extent + + the root box size, a scalar + + .. attribute:: nlevels + + .. attribute:: nboxes + + Can be larger than the actual number of boxes, since the box ids of + purged boxes during coarsening are not reused. + + .. attribute:: nboxes_level + + ``size_t [nlevels]`` + + Can be larger than the actual number of boxes at each level due to + the same reason for nboxes. + + .. attribute:: n_active_boxes + + .. ------------------------------------------------------------------------ + .. rubric:: Box properties + .. ------------------------------------------------------------------------ + + .. attribute:: box_centers + + ``coord_t [dimensions, nboxes]`` + + (C order, 'structure of arrays') + + .. attribute:: box_levels + + :attr:`box_level_dtype` ``box_level_t [nboxes]`` + + .. attribute:: box_is_active + + :attr:`bool` ``bool [nboxes]`` + + FIXME: pyopencl cannot map 'bool'. Resort to use an int for now. + + .. ------------------------------------------------------------------------ + .. rubric:: Structural properties + .. ------------------------------------------------------------------------ + + .. attribute:: level_boxes + + ``numpy.ndarray [nlevels]`` + ``box_id_t [nlevels][nboxes_level[level]]`` + + A :class:`numpy.ndarray` of box ids at each level. It acts as an + inverse lookup table of box_levels. The outer layer is an object + array to account for variable lengths at each level. + + .. attribute:: box_parent_ids + + ``box_id_t [nboxes]`` + + Box 0 (the root) has 0 as its parent. + + .. attribute:: box_child_ids + + ``box_id_t [2**dimensions, nboxes]`` + + (C order, 'structure of arrays') + + "0" is used as a 'no child' marker, as the root box can never + occur as any box's child. Boxes with no child are called active + boxes. + + .. attribute:: active_boxes + + ``box_id_t [n_active_boxes]`` + """ + + def __init__(self, queue, + root_vertex=np.zeros(2), + root_extent=1, + nlevels=1, + box_id_dtype=np.int32, + box_level_dtype=np.int32, + coord_dtype=np.float64): + """A plain boxtree with uniform levels (a complete tree). + The root box is given its vertex with the smallest coordinates + and its extent. + """ + self.size_t = np.int32 + self.box_id_dtype = box_id_dtype + self.box_level_dtype = box_level_dtype + self.coord_dtype = coord_dtype + + dim = len(root_vertex) + self.root_extent = root_extent + + self.nboxes_level = cl.array.to_device(queue, + np.array( + [1 << (dim * l) for l in range(nlevels)], + dtype=self.size_t)) + nboxes = self.size_t(cl.array.sum(self.nboxes_level).get()) + + self.box_levels = cl.array.zeros(queue, nboxes, box_level_dtype) + level_start = 0 + for l in range(nlevels): + offset = self.size_t(self.nboxes_level[l].get()) + self.box_levels[level_start:level_start + offset] = l + level_start += offset + + self.box_centers = cl.array.zeros(queue, (dim, nboxes), coord_dtype) + ibox = 0 + for l in range(nlevels): + dx = self.root_extent / (1 << l) + for cid in product(range(1 << l), repeat=dim): + for d in range(dim): + self.box_centers[d, ibox] = cid[d] * dx + ( + dx / 2 + root_vertex[d]) + ibox += 1 + + n_active_boxes = self.size_t(self.nboxes_level[nlevels - 1].get()) + self.active_boxes = cl.array.to_device(queue, + np.array( + range(nboxes - n_active_boxes, nboxes), + dtype=self.box_id_dtype)) + + # FIXME: map bool in pyopencl + # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype + # raise ValueError("unable to map dtype '%s'" % dtype) + # ValueError: unable to map dtype 'bool' + self.box_is_active = cl.array.zeros(queue, nboxes, np.int32) + self.box_is_active[nboxes - n_active_boxes:] = 1 + + self.level_boxes = make_obj_array([ + cl.array.zeros(queue, 1 << (dim * l), self.box_id_dtype) + for l in range(nlevels)]) + ibox = 0 + for l in range(nlevels): + for b in range(len(self.level_boxes[l])): + self.level_boxes[l][b] = ibox + ibox += 1 + + self.box_parent_ids = cl.array.zeros(queue, nboxes, self.box_id_dtype) + self.box_child_ids = cl.array.zeros(queue, (1 << dim, nboxes), + self.box_id_dtype) + for l in range(nlevels): + if l == 0: + self.box_parent_ids[0] = 0 + else: + multi_index_bases = tuple( + 1 << ((dim - 1 - d) * (l-1)) for d in range(dim)) + for ilb, multi_ind in zip(range(len(self.level_boxes[l])), + product(range(1 << l), repeat=dim)): + ibox = self.box_id_dtype(self.level_boxes[l][ilb].get()) + parent_multi_ind = tuple(ind // 2 for ind in multi_ind) + parent_level_id = np.sum([ind * base for ind, base + in zip(parent_multi_ind, multi_index_bases)]) + self.box_parent_ids[ibox] = self.level_boxes[ + l-1][parent_level_id] + + child_multi_index_bases = tuple( + 1 << (dim - 1 - d) for d in range(dim)) + child_multi_ind = tuple(ind - pind * 2 for ind, pind + in zip(multi_ind, parent_multi_ind)) + child_id = np.sum([ind * base for ind, base + in zip(child_multi_ind, child_multi_index_bases)]) + self.box_child_ids[child_id][self.box_id_dtype( + self.level_boxes[l-1][parent_level_id].get()) + ] = ibox + + @property + def dimensions(self): + return len(self.box_centers) + + @property + def nboxes(self): + return len(self.box_levels) + + @property + def nlevels(self): + return len(self.level_boxes) + + @property + def n_active_boxes(self): + return len(self.active_boxes) + + fields = ["box_centers"] + + def plot(self, **kwargs): + from boxtree.visualization import BoxTreePlotter + plotter = BoxTreePlotter(self) + plotter.draw_tree(**kwargs) + plotter.set_bounding_box() + + def get_box_extent(self, ibox): + if isinstance(ibox, cl.array.Array): + ibox = self.box_id_dtype(ibox.get()) + lev = self.box_level_dtype(self.box_levels[ibox].get()) + box_size = self.root_extent / (1 << lev) + extent_low = np.zeros(self.dimensions, self.coord_dtype) + for d in range(self.dimensions): + extent_low[d] = self.box_centers[d, ibox].get() - 0.5 * box_size + extent_high = extent_low + box_size + return extent_low, extent_high + +# }}} End Data structure for a tree of boxes diff --git a/boxtree/visualization.py b/boxtree/visualization.py index c487a96..f9dbbb0 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -172,6 +172,100 @@ class TreePlotter: # }}} +# {{{ box tree plotting + +class BoxTreePlotter(TreePlotter): + """Assumes that the tree has data living on the device. + Only plots the 2D trees. + """ + + def __init__(self, tree): + self.tree = tree + + def draw_tree(self, **kwargs): + if self.tree.dimensions != 2: + raise NotImplementedError("can only plot 2D trees for now") + + fill = kwargs.pop("fill", False) + edgecolor = kwargs.pop("edgecolor", "black") + kwargs["fill"] = fill + kwargs["edgecolor"] = edgecolor + + for iabox in range(self.tree.n_active_boxes): + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + self.draw_box(ibox, **kwargs) + + def set_bounding_box(self): + import matplotlib.pyplot as pt + root_center = np.array([self.tree.box_centers[d, 0].get() for d + in range(self.tree.dimensions)]) + root_extent = self.tree.root_extent + pt.xlim(root_center[0] - root_extent / 2, + root_center[0] + root_extent / 2) + pt.ylim(root_center[1] - root_extent / 2, + root_center[1] + root_extent / 2) + + pt.gca().set_aspect("equal") + + def draw_box_numbers(self): + import matplotlib.pyplot as pt + + tree = self.tree + + for iabox in range(tree.n_active_boxes): + ibox = tree.box_id_dtype(tree.active_boxes[iabox].get()) + x, y = tree.box_centers[:, ibox] + lev = int(tree.box_levels[ibox]) + pt.text(x, y, str(ibox), fontsize=20*1.15**(-lev), + ha="center", va="center", + bbox=dict(facecolor='white', alpha=0.5, lw=0)) + + def get_tikz_for_tree(self): + if self.tree.dimensions != 2: + raise NotImplementedError("can only plot 2D trees for now") + + lines = [] + + lines.append(r"\def\nboxes{%d}" % self.tree.nboxes) + lines.append(r"\def\lastboxnr{%d}" % (self.tree.nboxes-1)) + + for iabox in range(self.tree.n_active_boxes): + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + el, eh = self.tree.get_box_extent(ibox) + + c = self.tree.box_centers[:, ibox] + + lines.append( + r"\coordinate (boxl%d) at (%r, %r);" + % (ibox, float(el[0]), float(el[1]))) + lines.append( + r"\coordinate (boxh%d) at (%r, %r);" + % (ibox, float(eh[0]), float(eh[1]))) + lines.append( + r"\coordinate (boxc%d) at (%r, %r);" + % (ibox, float(c[0]), float(c[1]))) + lines.append( + r"\def\boxsize%s{%r}" + % (int_to_roman(ibox), float(eh[0]-el[0]))) + lines.append( + r"\def\boxlevel%s{%r}" + % (int_to_roman(ibox), self.tree.box_levels[ibox])) + + lines.append( + r"\def\boxpath#1{(boxl#1) rectangle (boxh#1)}") + lines.append( + r"\def\drawboxes{" + r"\foreach \ibox in {0,...,\lastboxnr}{" + r"\draw \boxpath{\ibox};" + r"}}") + lines.append( + r"\def\drawboxnrs{" + r"\foreach \ibox in {0,...,\lastboxnr}{" + r"\node [font=\tiny] at (boxc\ibox) {\ibox};" + r"}}") + return "\n".join(lines) + +# }}} # {{{ traversal plotting diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py new file mode 100644 index 0000000..576315a --- /dev/null +++ b/examples/interactive_boxtree_demo.py @@ -0,0 +1,26 @@ +import pyopencl as cl +import boxtree.tree_interactive_build + +import matplotlib.pyplot as pt + +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +tree = boxtree.tree_interactive_build.BoxTree(queue, nlevels=4) + +tree.plot() + +pt.tick_params( + axis='x', # changes apply to the x-axis + which='both', # both major and minor ticks are affected + bottom='off', # ticks along the bottom edge are off + top='off', # ticks along the top edge are off + labelbottom='off') +pt.tick_params( + axis='y', + which='both', + left='off', + top='off', + labelleft='off') + +pt.show() -- GitLab From 6abfae2cf5e5bec699fc55ffd4539fd58bcdc7ae Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 18 Jun 2018 11:38:36 +0800 Subject: [PATCH 09/16] Improve boxtree plotting --- boxtree/tree_interactive_build.py | 48 ++++++++++++++++++++++++---- boxtree/visualization.py | 12 +++---- examples/interactive_boxtree_demo.py | 10 ++++-- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 0f3ab50..d6a6e24 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -127,8 +127,27 @@ class BoxTree(DeviceDataRecord): ``box_id_t [n_active_boxes]`` """ - - def __init__(self, queue, + def get_copy_kwargs(self, **kwargs): + # cl arrays + for f in self.__class__.fields: + if f not in kwargs: + try: + kwargs[f] = getattr(self, f) + except AttributeError: + pass + + # others + kwargs.update({ + "size_t":self.size_t, + "box_id_dtype":self.box_id_dtype, + "box_level_dtype":self.box_level_dtype, + "coord_dtype":self.coord_dtype, + "root_extent":self.root_extent, + }) + + return kwargs + + def generate_uniform_boxtree(self, queue, root_vertex=np.zeros(2), root_extent=1, nlevels=1, @@ -150,7 +169,9 @@ class BoxTree(DeviceDataRecord): self.nboxes_level = cl.array.to_device(queue, np.array( [1 << (dim * l) for l in range(nlevels)], - dtype=self.size_t)) + dtype=self.size_t)) + self.register_fields({"nboxes_level":self.nboxes_level}) + nboxes = self.size_t(cl.array.sum(self.nboxes_level).get()) self.box_levels = cl.array.zeros(queue, nboxes, box_level_dtype) @@ -159,6 +180,7 @@ class BoxTree(DeviceDataRecord): offset = self.size_t(self.nboxes_level[l].get()) self.box_levels[level_start:level_start + offset] = l level_start += offset + self.register_fields({"box_levels":self.box_levels}) self.box_centers = cl.array.zeros(queue, (dim, nboxes), coord_dtype) ibox = 0 @@ -169,12 +191,14 @@ class BoxTree(DeviceDataRecord): self.box_centers[d, ibox] = cid[d] * dx + ( dx / 2 + root_vertex[d]) ibox += 1 + self.register_fields({"box_centers":self.box_centers}) n_active_boxes = self.size_t(self.nboxes_level[nlevels - 1].get()) self.active_boxes = cl.array.to_device(queue, np.array( range(nboxes - n_active_boxes, nboxes), dtype=self.box_id_dtype)) + self.register_fields({"active_boxes":self.active_boxes}) # FIXME: map bool in pyopencl # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype @@ -182,6 +206,7 @@ class BoxTree(DeviceDataRecord): # ValueError: unable to map dtype 'bool' self.box_is_active = cl.array.zeros(queue, nboxes, np.int32) self.box_is_active[nboxes - n_active_boxes:] = 1 + self.register_fields({"box_is_active":self.box_is_active}) self.level_boxes = make_obj_array([ cl.array.zeros(queue, 1 << (dim * l), self.box_id_dtype) @@ -191,6 +216,7 @@ class BoxTree(DeviceDataRecord): for b in range(len(self.level_boxes[l])): self.level_boxes[l][b] = ibox ibox += 1 + self.register_fields({"level_boxes":self.level_boxes}) self.box_parent_ids = cl.array.zeros(queue, nboxes, self.box_id_dtype) self.box_child_ids = cl.array.zeros(queue, (1 << dim, nboxes), @@ -219,6 +245,9 @@ class BoxTree(DeviceDataRecord): self.box_child_ids[child_id][self.box_id_dtype( self.level_boxes[l-1][parent_level_id].get()) ] = ibox + self.register_fields({ + "box_parent_ids":self.box_parent_ids, + "box_child_ids":self.box_child_ids}) @property def dimensions(self): @@ -236,8 +265,6 @@ class BoxTree(DeviceDataRecord): def n_active_boxes(self): return len(self.active_boxes) - fields = ["box_centers"] - def plot(self, **kwargs): from boxtree.visualization import BoxTreePlotter plotter = BoxTreePlotter(self) @@ -247,11 +274,18 @@ class BoxTree(DeviceDataRecord): def get_box_extent(self, ibox): if isinstance(ibox, cl.array.Array): ibox = self.box_id_dtype(ibox.get()) - lev = self.box_level_dtype(self.box_levels[ibox].get()) + lev = self.box_level_dtype(self.box_levels[ibox].get()) + else: + lev = self.box_level_dtype(self.box_levels[ibox]) box_size = self.root_extent / (1 << lev) extent_low = np.zeros(self.dimensions, self.coord_dtype) for d in range(self.dimensions): - extent_low[d] = self.box_centers[d, ibox].get() - 0.5 * box_size + if isinstance(self.box_centers[0], cl.array.Array): + extent_low[d] = self.box_centers[ + d, ibox].get() - 0.5 * box_size + else: + extent_low[d] = self.box_centers[d, ibox] - 0.5 * box_size + extent_high = extent_low + box_size return extent_low, extent_high diff --git a/boxtree/visualization.py b/boxtree/visualization.py index f9dbbb0..a891265 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -175,8 +175,8 @@ class TreePlotter: # {{{ box tree plotting class BoxTreePlotter(TreePlotter): - """Assumes that the tree has data living on the device. - Only plots the 2D trees. + """Assumes that the tree has data living on the host. + See :meth:`boxtree.BoxTree.get`. """ def __init__(self, tree): @@ -192,12 +192,12 @@ class BoxTreePlotter(TreePlotter): kwargs["edgecolor"] = edgecolor for iabox in range(self.tree.n_active_boxes): - ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox]) self.draw_box(ibox, **kwargs) def set_bounding_box(self): import matplotlib.pyplot as pt - root_center = np.array([self.tree.box_centers[d, 0].get() for d + root_center = np.array([self.tree.box_centers[d, 0] for d in range(self.tree.dimensions)]) root_extent = self.tree.root_extent pt.xlim(root_center[0] - root_extent / 2, @@ -213,7 +213,7 @@ class BoxTreePlotter(TreePlotter): tree = self.tree for iabox in range(tree.n_active_boxes): - ibox = tree.box_id_dtype(tree.active_boxes[iabox].get()) + ibox = tree.box_id_dtype(tree.active_boxes[iabox]) x, y = tree.box_centers[:, ibox] lev = int(tree.box_levels[ibox]) pt.text(x, y, str(ibox), fontsize=20*1.15**(-lev), @@ -230,7 +230,7 @@ class BoxTreePlotter(TreePlotter): lines.append(r"\def\lastboxnr{%d}" % (self.tree.nboxes-1)) for iabox in range(self.tree.n_active_boxes): - ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox]) el, eh = self.tree.get_box_extent(ibox) c = self.tree.box_centers[:, ibox] diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index 576315a..69be583 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -6,9 +6,15 @@ import matplotlib.pyplot as pt ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -tree = boxtree.tree_interactive_build.BoxTree(queue, nlevels=4) +tree = boxtree.tree_interactive_build.BoxTree() +tree.generate_uniform_boxtree(queue, nlevels=4) -tree.plot() +# call get() before plotting +from boxtree.visualization import BoxTreePlotter +plt = BoxTreePlotter(tree.get(queue)) +plt.draw_tree() +plt.set_bounding_box() +plt.draw_box_numbers() pt.tick_params( axis='x', # changes apply to the x-axis -- GitLab From 4e7aa05cb008c644db323b973333216c00313bc7 Mon Sep 17 00:00:00 2001 From: xywei Date: Tue, 19 Jun 2018 09:59:46 +0800 Subject: [PATCH 10/16] Implement getters for centers and measures --- boxtree/tree_interactive_build.py | 139 ++++++++++++++++++++++++++- examples/interactive_boxtree_demo.py | 10 +- 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index d6a6e24..6af7339 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -24,6 +24,7 @@ THE SOFTWARE. import numpy as np from itertools import product +import loopy as lp import pyopencl as cl import pyopencl.array # noqa from pytools.obj_array import make_obj_array @@ -204,7 +205,7 @@ class BoxTree(DeviceDataRecord): # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype # raise ValueError("unable to map dtype '%s'" % dtype) # ValueError: unable to map dtype 'bool' - self.box_is_active = cl.array.zeros(queue, nboxes, np.int32) + self.box_is_active = cl.array.zeros(queue, nboxes, np.int8) self.box_is_active[nboxes - n_active_boxes:] = 1 self.register_fields({"box_is_active":self.box_is_active}) @@ -290,3 +291,139 @@ class BoxTree(DeviceDataRecord): return extent_low, extent_high # }}} End Data structure for a tree of boxes + +# {{{ Discretize a BoxTree using quadrature + +class QuadratureOnBoxTree(object): + """Tensor-product quadrature rules applied to each active box. + + This class only holds data on template quadrature rules, while it + responds to queries for: + - quadrature nodes + - quadrature weights + - active box centers + - active box measures (areas/volumes) + + The information is generated on the fly and is responsive to + changes made to the underlying BoxTree object. + + .. attribute:: quadrature_formula + + The 1D template quadrature formula defined on [-1, 1]. + + .. attribute:: boxtree + + The underlying BoxTree object, assumed to reside on the device, + i.e., its get() method was not called. + """ + + def __init__(self, boxtree, quadrature_formula=None): + """If no quadrature_formula is supplied, the trivial one is used + sum[ f(centers) * measures ] + """ + if not isinstance(boxtree, BoxTree): + raise RuntimeError("Invalid boxtree") + + self.boxtree = boxtree + + if quadrature_formula is None: + from modepy import GaussLegendreQuadrature + self.quadrature_formula = GaussLegendreQuadrature(0) + else: + self.quadrature_formula = quadrature_formula + + def get_q_points(self): + """Return quadrature points. + """ + pass + + def get_q_weights(self): + """Return quadrature weights. + """ + pass + + def get_cell_centers(self, queue): + """Return centers of active boxes. + """ + def get_knl(): + knl = lp.make_kernel( + "{[iabox, iaxis]: 0<=iabox box_id = active_boxes[iabox] + result[iaxis, iabox] = box_centers[iaxis, box_id] + """, + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_centers", self.boxtree.coord_dtype, + shape="(dims, nboxes)"), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("dims", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_centers") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl()(queue, + active_boxes=self.boxtree.active_boxes, + box_centers=self.boxtree.box_centers, + n_active_boxes=self.boxtree.n_active_boxes, + dims=self.boxtree.dimensions, + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + return result[0] + + def get_cell_measures(self, queue): + """Return measures of active boxes. + This method does not call BoxTree.get_box_extent() for performance + reasons. + """ + def get_knl(): + knl = lp.make_kernel( + "{[iabox]: 0<=iabox box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + result[iabox] = root_measure / (2 ** (box_level * dims)) + """, + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, + shape="nboxes"), + lp.ValueArg("root_measure", self.boxtree.coord_dtype), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("dims", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_measures") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl()(queue, + active_boxes=self.boxtree.active_boxes, + box_levels=self.boxtree.box_levels, + root_measure=( + self.boxtree.root_extent ** self.boxtree.dimensions), + n_active_boxes=self.boxtree.n_active_boxes, + dims=self.boxtree.dimensions, + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + return result[0] + + +# }}} End Discretize a BoxTree using quadrature diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index 69be583..134e570 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -2,12 +2,20 @@ import pyopencl as cl import boxtree.tree_interactive_build import matplotlib.pyplot as pt +import numpy as np ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) tree = boxtree.tree_interactive_build.BoxTree() -tree.generate_uniform_boxtree(queue, nlevels=4) +tree.generate_uniform_boxtree(queue, nlevels=3) + +quad = boxtree.tree_interactive_build.QuadratureOnBoxTree(tree) +cell_centers = quad.get_cell_centers(queue) + +cell_measures = quad.get_cell_measures(queue) +print(cell_measures) +print(np.sum(cell_measures.get())) # call get() before plotting from boxtree.visualization import BoxTreePlotter -- GitLab From b2bbe01282d4ca7520a7efa9e66bc97c1a5e51a9 Mon Sep 17 00:00:00 2001 From: xywei Date: Tue, 19 Jun 2018 17:00:56 +0800 Subject: [PATCH 11/16] Adding quad info --- boxtree/tree_interactive_build.py | 104 +++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 6af7339..8775124 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -335,12 +335,114 @@ class QuadratureOnBoxTree(object): def get_q_points(self): """Return quadrature points. """ + q_nodes = cl.array.to_device(queue, self.quadrature_formula.nodes) + + def get_knl(dim): + quad_vars = ["iq%d" % i for i in range(dim)] + knl = lp.make_kernel( + "{[iabox, iaxis, QUAD_VARS]: 0<=iabox box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_extent = root_extent / 2 ** (box_level+1) + result[iaxis, QUAD_VARS, iabox] = box_centers[iaxis] + ( + if(iaxis==0, q_nodes[iq0], + if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) + ) * box_relative_extent) + """ + .replace("QUAD_VARS", ", ".join(quad_vars)), + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, + shape="nboxes"), + lp.GlobalArg("q_nodes", self.boxtree.coord_dtype, + shape=lp.auto), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("n_q_nodes", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_quad_points") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl(self.boxtree.dimensions)(queue, + active_boxes=self.boxtree.active_boxes, + box_levels=self.boxtree.box_levels, + q_weights=q_weights, + n_active_boxes=self.boxtree.n_active_boxes, + n_q_nodes=len(q_weights), + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + # TODO: If the ordering is not the same as dealii, reverse the order of quad vars + return result[0].ravel() + pass def get_q_weights(self): """Return quadrature weights. """ - pass + q_weights = cl.array.to_device(queue, self.quadrature_formula.weights) + + def get_knl(dim): + quad_vars = ["iq%d" % i for i in range(dim)] + knl = lp.make_kernel( + "{[iabox, QUAD_VARS]: 0<=iabox box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_measure = root_measure / ( + 2 ** ((box_level+1) * DIM)) + result[QUAD_VARS, iabox] = QUAD_WEIGHT * box_relative_measure + """.replace("DIM", str(dim)) + .replace("QUAD_VARS", ", ".join(quad_vars)) + .replace("QUAD_WEIGHT", " * ".join( + ["q_weights[" + qvar + "]" for qvar in quad_vars])) + , + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, + shape="nboxes"), + lp.GlobalArg("q_weights", self.boxtree.coord_dtype, + shape=lp.auto), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("n_q_nodes", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_quad_weights") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl(self.boxtree.dimensions)(queue, + active_boxes=self.boxtree.active_boxes, + box_levels=self.boxtree.box_levels, + q_weights=q_weights, + n_active_boxes=self.boxtree.n_active_boxes, + n_q_nodes=len(q_weights), + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + # TODO: If the ordering is not the same as dealii, reverse the order of quad vars + return result[0].ravel() def get_cell_centers(self, queue): """Return centers of active boxes. -- GitLab From 93e3f85832c43318c8e6b2708cac07c95c8609be Mon Sep 17 00:00:00 2001 From: xywei Date: Sun, 24 Jun 2018 20:08:30 +0800 Subject: [PATCH 12/16] Sync --- boxtree/tree_interactive_build.py | 49 ++++++++++++++++++++-------- examples/interactive_boxtree_demo.py | 18 +++++++--- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 8775124..1c45487 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -332,16 +332,29 @@ class QuadratureOnBoxTree(object): else: self.quadrature_formula = quadrature_formula - def get_q_points(self): + def get_q_points(self, queue): """Return quadrature points. """ q_nodes = cl.array.to_device(queue, self.quadrature_formula.nodes) + def tensor_q_node_code(dim): + if dim == 1: + return "q_nodes[iq0]" + elif dim == 2: + return "if(iaxis==0, q_nodes[iq0], q_nodes[iq1])" + elif dim == 3: + return """if(iaxis==0, q_nodes[iq0], + if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) + ) + """ + def get_knl(dim): quad_vars = ["iq%d" % i for i in range(dim)] knl = lp.make_kernel( - "{[iabox, iaxis, QUAD_VARS]: 0<=iabox box_id = active_boxes[iabox] <> box_level = box_levels[box_id] <> box_relative_extent = root_extent / 2 ** (box_level+1) - result[iaxis, QUAD_VARS, iabox] = box_centers[iaxis] + ( - if(iaxis==0, q_nodes[iq0], - if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) - ) * box_relative_extent) + result[iaxis, iabox, QUAD_VARS] = box_centers[iaxis, box_id] + ( + GET_TENSOR_Q_NODE_IAXIS + ) * box_relative_extent """ + .replace("GET_TENSOR_Q_NODE_IAXIS", tensor_q_node_code(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)), [ lp.GlobalArg("result", self.boxtree.coord_dtype, @@ -364,6 +377,9 @@ class QuadratureOnBoxTree(object): shape="nboxes"), lp.GlobalArg("q_nodes", self.boxtree.coord_dtype, shape=lp.auto), + lp.GlobalArg("box_centers", self.boxtree.coord_dtype, + shape="(DIM, nboxes)".replace("DIM", str(dim))), + lp.ValueArg("root_extent", self.boxtree.coord_dtype), lp.ValueArg("n_active_boxes", self.boxtree.size_t), lp.ValueArg("n_q_nodes", self.boxtree.size_t), lp.ValueArg("nboxes", self.boxtree.size_t), @@ -378,18 +394,20 @@ class QuadratureOnBoxTree(object): evt, result = get_knl(self.boxtree.dimensions)(queue, active_boxes=self.boxtree.active_boxes, box_levels=self.boxtree.box_levels, - q_weights=q_weights, + q_nodes=q_nodes, + box_centers=self.boxtree.box_centers, + root_extent=self.boxtree.root_extent, n_active_boxes=self.boxtree.n_active_boxes, - n_q_nodes=len(q_weights), + n_q_nodes=len(q_nodes), nboxes=self.boxtree.nboxes) assert len(result) == 1 # TODO: If the ordering is not the same as dealii, reverse the order of quad vars - return result[0].ravel() + return make_obj_array([cpnt.ravel() for cpnt in result[0]]) pass - def get_q_weights(self): + def get_q_weights(self, queue): """Return quadrature weights. """ q_weights = cl.array.to_device(queue, self.quadrature_formula.weights) @@ -397,7 +415,9 @@ class QuadratureOnBoxTree(object): def get_knl(dim): quad_vars = ["iq%d" % i for i in range(dim)] knl = lp.make_kernel( - "{[iabox, QUAD_VARS]: 0<=iabox Date: Sun, 24 Jun 2018 20:20:48 +0800 Subject: [PATCH 13/16] Quad points in the dicionary order for each box --- examples/interactive_boxtree_demo.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index 6b9b859..d2fbfd1 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -8,11 +8,14 @@ ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) tree = boxtree.tree_interactive_build.BoxTree() -tree.generate_uniform_boxtree(queue, nlevels=3) +tree.generate_uniform_boxtree(queue, nlevels=3, root_extent=2, + # root_vertex=(0,0,0)) + root_vertex=(0,0)) # the default quad formula uses cell centers and cell measures from modepy import GaussLegendreQuadrature -quadrature_formula = GaussLegendreQuadrature(1) +n_q_points = 5 +quadrature_formula = GaussLegendreQuadrature(n_q_points - 1) print(quadrature_formula.nodes, quadrature_formula.weights) quad = boxtree.tree_interactive_build.QuadratureOnBoxTree(tree, quadrature_formula) @@ -21,8 +24,8 @@ cell_measures = quad.get_cell_measures(queue) q_points = quad.get_q_points(queue) q_weights = quad.get_q_weights(queue) -print(q_points) -print(q_weights) +# print(q_points) +# print(q_weights) # print(q_points - cell_centers) # print(q_weights - cell_measures) @@ -34,6 +37,11 @@ plt.draw_tree() plt.set_bounding_box() plt.draw_box_numbers() +qx = q_points[0].get(queue) +qy = q_points[1].get(queue) + +pt.plot(qx, qy, '*') + pt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected -- GitLab From 4f6e5455b6343ef3d10365cd29b9345588f466fd Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 25 Jun 2018 10:58:49 +0800 Subject: [PATCH 14/16] Bug fixes --- boxtree/tree_interactive_build.py | 46 ++++++++++++++++++---------- examples/interactive_boxtree_demo.py | 2 +- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 1c45487..ff0cd7f 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -25,6 +25,7 @@ import numpy as np from itertools import product import loopy as lp +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 import pyopencl as cl import pyopencl.array # noqa from pytools.obj_array import make_obj_array @@ -359,12 +360,16 @@ class QuadratureOnBoxTree(object): ["0<=" + qvar + " box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - <> box_relative_extent = root_extent / 2 ** (box_level+1) - result[iaxis, iabox, QUAD_VARS] = box_centers[iaxis, box_id] + ( - GET_TENSOR_Q_NODE_IAXIS - ) * box_relative_extent + for iabox + <> box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_extent = root_extent / 2 ** (box_level+1) + for iaxis, QUAD_VARS + result[iaxis, iabox, QUAD_VARS] = box_centers[ + iaxis, box_id] + ( + GET_TENSOR_Q_NODE_IAXIS * box_relative_extent) + end + end """ .replace("GET_TENSOR_Q_NODE_IAXIS", tensor_q_node_code(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)), @@ -422,11 +427,16 @@ class QuadratureOnBoxTree(object): ["0<=" + qvar + " box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - <> box_relative_measure = root_measure / ( - 2 ** ((box_level+1) * DIM)) - result[QUAD_VARS, iabox] = QUAD_WEIGHT * box_relative_measure + for iabox + <> box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_measure = root_measure / ( + 2 ** ((box_level+1) * DIM)) + for QUAD_VARS + result[QUAD_VARS, iabox] = (QUAD_WEIGHT + * box_relative_measure) + end + end """.replace("DIM", str(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)) .replace("QUAD_WEIGHT", " * ".join( @@ -475,8 +485,10 @@ class QuadratureOnBoxTree(object): "{[iabox, iaxis]: 0<=iabox box_id = active_boxes[iabox] - result[iaxis, iabox] = box_centers[iaxis, box_id] + for iabox + <> box_id = active_boxes[iabox] + result[iaxis, iabox] = box_centers[iaxis, box_id] + end """, [ lp.GlobalArg("result", self.boxtree.coord_dtype, @@ -515,9 +527,11 @@ class QuadratureOnBoxTree(object): knl = lp.make_kernel( "{[iabox]: 0<=iabox box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - result[iabox] = root_measure / (2 ** (box_level * dims)) + for iabox + <> box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + result[iabox] = root_measure / (2 ** (box_level * dims)) + end """, [ lp.GlobalArg("result", self.boxtree.coord_dtype, diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index d2fbfd1..1a29905 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -25,7 +25,7 @@ q_points = quad.get_q_points(queue) q_weights = quad.get_q_weights(queue) # print(q_points) -# print(q_weights) +# print(q_weights, np.sum(q_weights.get())) # print(q_points - cell_centers) # print(q_weights - cell_measures) -- GitLab From aaaeb687cfc58c8bcf0853967336f51069117bc1 Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 25 Jun 2018 16:03:26 +0800 Subject: [PATCH 15/16] Bug fix --- boxtree/tree_interactive_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index ff0cd7f..7eca757 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -433,7 +433,7 @@ class QuadratureOnBoxTree(object): <> box_relative_measure = root_measure / ( 2 ** ((box_level+1) * DIM)) for QUAD_VARS - result[QUAD_VARS, iabox] = (QUAD_WEIGHT + result[iabox, QUAD_VARS] = (QUAD_WEIGHT * box_relative_measure) end end -- GitLab From 1650321ee55c649a4df4df81099cdfda45860d6a Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 27 Jun 2018 12:25:31 +0800 Subject: [PATCH 16/16] Add neighborhood info to BoxTree --- boxtree/tree_interactive_build.py | 92 +++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 7eca757..30a10fb 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import numpy as np +from cgen import Enum from itertools import product import loopy as lp @@ -31,6 +32,20 @@ import pyopencl.array # noqa from pytools.obj_array import make_obj_array from boxtree.tools import DeviceDataRecord +# {{{ adaptivity_flags + +class adaptivity_flags_enum(Enum): # noqa + """Constants for box adaptivity flags bit field.""" + + c_name = "adaptivity_flags_t" + dtype = np.dtype(np.uint8) + c_value_prefix = "ADAPTIVITY_FLAG_" + + REFINE = 1 << 0 + COARSEN = 1 << 1 + +# }}} + # {{{ Data structure for a tree of boxes class BoxTree(DeviceDataRecord): @@ -128,6 +143,31 @@ class BoxTree(DeviceDataRecord): .. attribute:: active_boxes ``box_id_t [n_active_boxes]`` + + .. ------------------------------------------------------------------------ + .. rubric:: Adaptivity properties + .. ------------------------------------------------------------------------ + + .. attribute:: adaptivity_flags + + :attr:`adaptivity_flags_enum.dtype` ``[n_active_boxes]`` + + A bitwise combination of :class:`adaptivity_flags_enum` constants. + + .. attribute:: n_active_neighbors + + ``size_t [n_active_boxes]`` + + Number of adjacent active boxes for each active box. + + .. attribute:: neighboring_active_boxes + + ``box_id_t [n_active_boxes, n_active_neighbors[n_active_boxes]]`` + + List of adjacent active boxes for each active box. The root box + cannot be in this list of any box, and 0 is used as a placeholder + in the case that the actual number of neighbors is less than + the value held in n_active_neighbors[]. """ def get_copy_kwargs(self, **kwargs): # cl arrays @@ -200,6 +240,7 @@ class BoxTree(DeviceDataRecord): np.array( range(nboxes - n_active_boxes, nboxes), dtype=self.box_id_dtype)) + assert len(self.active_boxes) == 1 << ((nlevels - 1) * dim) self.register_fields({"active_boxes":self.active_boxes}) # FIXME: map bool in pyopencl @@ -251,6 +292,57 @@ class BoxTree(DeviceDataRecord): "box_parent_ids":self.box_parent_ids, "box_child_ids":self.box_child_ids}) + self.adaptivity_flags = cl.array.zeros(queue, + self.n_active_boxes, + adaptivity_flags_enum.dtype) + self.register_fields({"adaptivity_flags": self.adaptivity_flags}) + + n_active_neighbors_host = np.zeros(n_active_boxes, dtype=self.size_t) + nbpatch = [np.array(patch) + for patch in product(range(-1, 2), repeat=dim) + if sum(np.abs(patch)) > 0] + assert len(nbpatch) == 3 ** dim - 1 + maxid = (1 << (nlevels - 1)) - 1 + iabox = 0 + for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): + if ( + min(multiabox) > 0 and + max(multiabox) < maxid + ): + n_active_neighbors_host[iabox] = 3 ** dim - 1 + else: + n_active_neighbors_host[iabox] = sum(1 for patch in nbpatch + if ( + np.min(np.array(multiabox) + patch) >= 0 and + np.max(np.array(multiabox) + patch) <= maxid + )) + iabox += 1 + self.n_active_neighbors = cl.array.to_device(queue, + n_active_neighbors_host) + self.register_fields({"n_active_neighbors": self.n_active_neighbors}) + + ind_bases = np.array([1 << (nlevels - 1) * (dim - d - 1) + for d in range(dim)]) + anb_pre = [] + iabox = 0 + for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): + nbid = 0 + anb_pre.append(np.zeros( + n_active_neighbors_host[iabox], dtype=self.box_id_dtype)) + for patch in nbpatch: + tmp = np.array(multiabox) + patch + if np.min(tmp) >= 0 and np.max(tmp) <= maxid: + print(patch, tmp) + anb_pre[-1][nbid] = self.box_id_dtype( + nboxes - n_active_boxes + + np.sum(tmp * ind_bases)) + nbid += 1 + iabox += 1 + self.active_neighboring_boxes = make_obj_array([ + cl.array.to_device(queue, anb) for anb in anb_pre]) + self.register_fields({ + "active_neighboring_boxes": self.active_neighboring_boxes}) + @property def dimensions(self): return len(self.box_centers) -- GitLab