diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 14941094a3ce02b4687426f48d8c4d25f6c459c8..f84c9ed818981df9fe36b94844a3d473d0697e9a 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -71,9 +71,10 @@ class TreeBuilder(object): # {{{ run control - def __call__(self, queue, particles, max_particles_in_box, + def __call__(self, queue, particles, max_particles_in_box=None, allocator=None, debug=False, targets=None, source_radii=None, target_radii=None, stick_out_factor=0.25, + refine_weights=None, max_leaf_refine_weight=None, wait_for=None, non_adaptive=False, **kwargs): """ @@ -90,6 +91,16 @@ class TreeBuilder(object): :arg target_radii: Like *source_radii*, but for targets. :arg stick_out_factor: See :attr:`Tree.stick_out_factor` and :ref:`extent`. + :arg refine_weights: If not *None*, a :class:`pyopencl.array.Array` of the + type :class:`numpy.int32`. A box will be split if it has a cumulative + refine_weight greater than *max_leaf_refine_weight*. If this is given, + *max_leaf_refine_weight* must also be given and *max_particles_in_box* + must be *None*. + :arg max_leaf_refine_weight: If not *None*, specifies the maximum weight + of a leaf box. + :arg max_particles_in_box: If not *None*, specifies the maximum number + of particles in a leaf box. If this is given, both + *refine_weights* and *max_leaf_refine_weight* must be *None*. :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` instances for whose completion this command waits before starting execution. @@ -239,6 +250,50 @@ class TreeBuilder(object): # }}} + # {{{ process refine_weights + + from boxtree.tree_build_kernels import refine_weight_dtype + + specified_max_particles_in_box = max_particles_in_box is not None + specified_refine_weights = refine_weights is not None and \ + max_leaf_refine_weight is not None + + if specified_max_particles_in_box and specified_refine_weights: + raise ValueError("may only specify one of max_particles_in_box and " + "refine_weights/max_leaf_refine_weight") + elif not specified_max_particles_in_box and not specified_refine_weights: + raise ValueError("must specify either max_particles_in_box or " + "refine_weights/max_leaf_refine_weight") + elif specified_max_particles_in_box: + refine_weights = ( + cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) + event, = refine_weights.events + prep_events.append(event) + max_leaf_refine_weight = max_particles_in_box + elif specified_refine_weights: + if refine_weights.dtype != refine_weight_dtype: + raise TypeError("refine_weights must have dtype '%s'" + % refine_weight_dtype) + + if max_leaf_refine_weight < cl.array.max(refine_weights).get(): + raise ValueError( + "entries of refine_weights cannot exceed max_leaf_refine_weight") + if 0 > cl.array.min(refine_weights).get(): + raise ValueError("all entries of refine_weights must be nonnegative") + if max_leaf_refine_weight <= 0: + raise ValueError("max_leaf_refine_weight must be positive") + + total_refine_weight = cl.array.sum( + refine_weights, dtype=np.dtype(np.int64)).get() + + del max_particles_in_box + del specified_max_particles_in_box + del specified_refine_weights + + # }}} + # {{{ find and process bounding box bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) @@ -297,7 +352,11 @@ class TreeBuilder(object): # to test the reallocation code. nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: - nboxes_guess = div_ceil(nsrcntgts, max_particles_in_box) * 2**dimensions + nboxes_guess = 2**dimensions * ( + (max_leaf_refine_weight + total_refine_weight - 1) + // max_leaf_refine_weight) + + assert nboxes_guess > 0 # per-box morton bin counts box_morton_bin_counts = empty(nboxes_guess, @@ -325,9 +384,13 @@ class TreeBuilder(object): prep_events.append(evt) # Initalize box 0 to contain all particles - evt = box_srcntgt_counts_cumul[0].fill( + box_srcntgt_counts_cumul[0].fill( nsrcntgts, queue=queue, wait_for=[evt]) + # box -> whether the box has a child + box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) + prep_events.append(evt) + # set parent of root box to itself evt = cl.enqueue_copy( queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) @@ -355,7 +418,7 @@ class TreeBuilder(object): from time import time start_time = time() - if nsrcntgts > max_particles_in_box: + if total_refine_weight > max_leaf_refine_weight: level = 1 else: level = 0 @@ -383,10 +446,12 @@ class TreeBuilder(object): common_args = ((morton_bin_counts, morton_nrs, box_start_flags, srcntgt_box_ids, split_box_ids, box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, box_srcntgt_starts, box_srcntgt_counts_cumul, box_parent_ids, box_morton_nrs, nboxes_dev, - level, max_particles_in_box, bbox, + level, bbox, user_srcntgt_ids) + tuple(srcntgts) + ((srcntgt_radii,) if srcntgts_have_extent else ()) @@ -407,8 +472,9 @@ class TreeBuilder(object): srcntgt_box_ids, box_srcntgt_starts, box_srcntgt_counts_cumul, - max_particles_in_box, box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, box_levels, level, @@ -416,6 +482,7 @@ class TreeBuilder(object): nboxes_dev, # output: + box_has_children, split_box_ids, queue=queue, size=nsrcntgts, wait_for=wait_for) wait_for = [evt] @@ -456,6 +523,8 @@ class TreeBuilder(object): box_srcntgt_counts_cumul, evt = \ my_realloc_zeros(box_srcntgt_counts_cumul) resize_events.append(evt) + box_has_children, evt = my_realloc_zeros(box_has_children) + resize_events.append(evt) del my_realloc del my_realloc_zeros @@ -502,8 +571,8 @@ class TreeBuilder(object): split_and_sort_args = ( common_args + (new_user_srcntgt_ids, have_oversize_split_box, - new_srcntgt_box_ids, box_levels)) - + new_srcntgt_box_ids, box_levels, + box_has_children)) fin_debug("split and sort") evt = knl_info.split_and_sort_kernel(*split_and_sort_args, @@ -628,6 +697,9 @@ class TreeBuilder(object): box_srcntgt_counts_nonchild) prune_events.append(evt) + box_has_children, evt = prune_empty(box_has_children) + prune_events.append(evt) + # Remap level_start_box_nrs to new box IDs. # FIXME: It would be better to do this on the device. level_start_box_nrs = list( @@ -875,8 +947,7 @@ class TreeBuilder(object): box_srcntgt_counts_cumul, box_source_counts_cumul, box_target_counts_cumul, - max_particles_in_box, - box_levels, nlevels, + box_has_children, box_levels, nlevels, # output if srcntgts_have_extent, input+output otherwise box_source_counts_nonchild, box_target_counts_nonchild, @@ -889,6 +960,8 @@ class TreeBuilder(object): # }}} + del box_has_children + # {{{ build output extra_tree_attrs = {} diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index da844884b8da1386286876420c96bf4449f05f02..b24c3b7ce3371639c8afd76c604c13fdeb5ac0ec 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -73,21 +73,34 @@ logger = logging.getLogger(__name__) # HOW DOES THE PRIMARY SCAN WORK? # ------------------------------- # -# This code sorts particles into an nD-tree of boxes. It does this by doing a -# (parallel) scan over particles and a (local, i.e. independent for each particle) -# postprocessing step for each level. +# This code sorts particles into an nD-tree of boxes. It does this by doing two +# succesive (parallel) scans over particles and a (local, i.e. independent for +# each particle) postprocessing step for each level. # -# The following information is being pushed around by the scan, which -# proceeds over particles: +# The following information is being pushed around by the scans, which +# proceed over particles: # -# - a cumulative count ("counts") of particles in each subbox ("morton_nr") at -# the current level, should the current box need to be subdivided. +# - a cumulative count ("pcnt") and weight ("pwt") of particles in each subbox +# ("morton_nr") at the current level, should the current box need to be +# subdivided. # -# - the "split_box_id". The very first entry here gets intialized to -# the number of boxes present at the previous level. If a box knows it needs to -# be subdivided, its first particle asks for 2**d new boxes. This gets scanned -# over by summing globally (unsegmented-ly). The splits are then realized in -# the post-processing step. +# - the "split_box_id". This is the box number that the particle gets pushed +# into. The very first entry here gets initialized to the number of boxes +# present at the previous level. +# +# Using this data, the stages of the algorithm proceed as follows: +# +# 1. Count the number of particles in each subbox. This stage uses a segmented +# (per-box) scan to fill "pcnt" and "pwt". +# +# 2. Using a global (non-segmented) scan over the particles, make a decision +# whether to refine each box, and compute the total number of new boxes +# needed. This stage also computes the split_box_id for each particle. If a +# box knows it needs to be subdivided, its first particle asks for 2**d new +# boxes. +# +# 3. Realize the splitting determined in #2. This stage proceeds in an +# element-wise fashion over the boxes at the current level. # # ----------------------------------------------------------------------------- @@ -98,6 +111,9 @@ class _KernelInfo(Record): # {{{ data types +refine_weight_dtype = np.dtype(np.int32) + + @memoize def make_morton_bin_count_type(device, dimensions, particle_id_dtype, srcntgts_have_extent): @@ -110,6 +126,9 @@ def make_morton_bin_count_type(device, dimensions, particle_id_dtype, from boxtree.tools import padded_bin for mnr in range(2**dimensions): fields.append(("pcnt%s" % padded_bin(mnr, dimensions), particle_id_dtype)) + # Morton bin weight totals + for mnr in range(2**dimensions): + fields.append(("pwt%s" % padded_bin(mnr, dimensions), refine_weight_dtype)) dtype = np.dtype(fields) @@ -135,6 +154,7 @@ def make_morton_bin_count_type(device, dimensions, particle_id_dtype, TYPE_DECL_PREAMBLE_TPL = Template(r"""//CL// typedef ${dtype_to_ctype(morton_bin_count_dtype)} morton_counts_t; typedef morton_counts_t scan_t; + typedef ${dtype_to_ctype(refine_weight_dtype)} refine_weight_t; typedef ${dtype_to_ctype(bbox_dtype)} bbox_t; typedef ${dtype_to_ctype(coord_dtype)} coord_t; typedef ${dtype_to_ctype(coord_vec_dtype)} coord_vec_t; @@ -189,17 +209,28 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// scan_t scan_t_neutral() { scan_t result; + %if srcntgts_have_extent: result.nonchild_srcntgts = 0; %endif + %for mnr in range(2**dimensions): result.pcnt${padded_bin(mnr, dimensions)} = 0; %endfor + %for mnr in range(2**dimensions): + result.pwt${padded_bin(mnr, dimensions)} = 0; + %endfor return result; } // }}} + inline int my_add_sat(int a, int b) + { + long result = (long) a + b; + return (result > INT_MAX) ? INT_MAX : result; + } + // {{{ scan 'add' operation scan_t scan_t_add(scan_t a, scan_t b, bool across_seg_boundary) { @@ -213,6 +244,16 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// <% field = "pcnt"+padded_bin(mnr, dimensions) %> b.${field} = a.${field} + b.${field}; %endfor + %for mnr in range(2**dimensions): + <% field = "pwt"+padded_bin(mnr, dimensions) %> + // XXX: The use of add_sat() seems to be causing trouble + // with multiple compilers. For d=3: + // 1. POCL will miscompile and either give wrong + // results or crash. + // 2. Intel will use a large amount of memory. + // Versions tested: POCL 0.13, Intel OpenCL 16.1 + b.${field} = my_add_sat(a.${field}, b.${field}); + %endfor } return b; @@ -227,7 +268,8 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// const int level, bbox_t const *bbox, global morton_nr_t *morton_nrs, // output/side effect - global particle_id_t *user_srcntgt_ids + global particle_id_t *user_srcntgt_ids, + global refine_weight_t *refine_weights %for ax in axis_names: , global const coord_t *${ax} %endfor @@ -325,6 +367,11 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// <% field = "pcnt"+padded_bin(mnr, dimensions) %> result.${field} = (level_morton_number == ${mnr}); %endfor + %for mnr in range(2**dimensions): + <% field = "pwt"+padded_bin(mnr, dimensions) %> + result.${field} = (level_morton_number == ${mnr}) ? + refine_weights[user_srcntgt_id] : 0; + %endfor morton_nrs[i] = level_morton_number; return result; @@ -377,8 +424,9 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_id_t *srcntgt_box_ids, particle_id_t *box_srcntgt_starts, particle_id_t *box_srcntgt_counts_cumul, - particle_id_t max_particles_in_box, morton_counts_t *box_morton_bin_counts, + refine_weight_t *refine_weights, + refine_weight_t max_leaf_refine_weight, box_level_t *box_levels, box_level_t level, @@ -386,18 +434,20 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_id_t *nboxes, /* output */ + int *box_has_children, box_id_t *split_box_ids, """, preamble=r"""//CL:mako// scan_t count_new_boxes_needed( particle_id_t i, box_id_t box_id, + refine_weight_t max_leaf_refine_weight, __global box_id_t *nboxes, __global particle_id_t *box_srcntgt_starts, __global particle_id_t *box_srcntgt_counts_cumul, __global morton_counts_t *box_morton_bin_counts, - particle_id_t max_particles_in_box, __global box_level_t *box_levels, + __global int *box_has_children, // output/side effect box_level_t level ) { @@ -407,6 +457,9 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( if (i == 0) result += *nboxes; + particle_id_t first_particle_in_my_box = + box_srcntgt_starts[box_id]; + %if srcntgts_have_extent: const particle_id_t nonchild_srcntgts_in_box = box_morton_bin_counts[box_id].nonchild_srcntgts; @@ -414,8 +467,12 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( const particle_id_t nonchild_srcntgts_in_box = 0; %endif - particle_id_t first_particle_in_my_box = - box_srcntgt_starts[box_id]; + // Get box refine weight. + refine_weight_t box_refine_weight = 0; + %for mnr in range(2**dimensions): + box_refine_weight = add_sat(box_refine_weight, + box_morton_bin_counts[box_id].pwt${padded_bin(mnr, dimensions)}); + %endfor // Add 2**d to make enough room for a split of the current box // This will be the split_box_id for *all* particles in this box, @@ -427,32 +484,34 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( // If srcntgts have extent, then prior-level boxes // will keep asking for more boxes to be allocated. // Prevent that. - && box_levels[box_id] + 1 == level %endif && %if adaptive: /* box overfull? */ - box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box - > max_particles_in_box + box_refine_weight + > max_leaf_refine_weight %else: /* box non-empty? */ + /* Note: Refine weights are allowed to be 0, + so check # of particles directly. */ box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box > 0 %endif ) { result += ${2**dimensions}; + box_has_children[box_id] = 1; } return result; } """, input_expr="""count_new_boxes_needed( - i, srcntgt_box_ids[i], nboxes, + i, srcntgt_box_ids[i], max_leaf_refine_weight, nboxes, box_srcntgt_starts, box_srcntgt_counts_cumul, box_morton_bin_counts, - max_particles_in_box, box_levels, level + box_levels, box_has_children, level )""", scan_expr="a + b", neutral="0", @@ -505,26 +564,7 @@ SPLIT_AND_SORT_KERNEL_TPL = Template(r"""//CL// dbg_printf(("postproc %d:\n", i)); dbg_printf((" my box id: %d\n", ibox)); - particle_id_t box_srcntgt_count = box_srcntgt_counts_cumul[ibox]; - - %if srcntgts_have_extent: - const particle_id_t nonchild_srcntgt_count = - box_morton_bin_counts[ibox].nonchild_srcntgts; - - %else: - const particle_id_t nonchild_srcntgt_count = 0; - %endif - - %if adaptive: - bool do_split_box = - box_srcntgt_count - nonchild_srcntgt_count - > max_particles_in_box; - %else: - bool do_split_box = - box_srcntgt_count - nonchild_srcntgt_count - > 0; - %endif - + bool do_split_box = box_has_children[ibox]; %if srcntgts_have_extent: ## Only do split-box processing for srcntgts that were touched ## on the immediately preceding level. @@ -627,9 +667,11 @@ SPLIT_AND_SORT_KERNEL_TPL = Template(r"""//CL// box_srcntgt_counts_cumul[new_box_id] = new_count; box_levels[new_box_id] = level; - // For a non-adaptive run, max_particles_in_box drives the - // level loop. - if (new_count > max_particles_in_box) + refine_weight_t new_weight = + my_box_morton_bin_counts.pwt${padded_bin(mnr, dimensions)}; + + // This drives the level loop. + if (new_weight > max_leaf_refine_weight) { *have_oversize_split_box = 1; } @@ -882,7 +924,7 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( particle_id_t *box_srcntgt_counts_cumul, particle_id_t *box_source_counts_cumul, particle_id_t *box_target_counts_cumul, - particle_id_t max_particles_in_box, + int *box_has_children, box_level_t *box_levels, box_level_t nlevels, @@ -952,24 +994,10 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( PYOPENCL_ELWISE_CONTINUE; } - else if ( - %if adaptive: - particle_count - nonchild_srcntgt_count > max_particles_in_box - %else: - particle_count - nonchild_srcntgt_count > 0 - %endif - && box_levels[box_id] + 1 < nlevels) + else if (box_has_children[box_id]) { // This box has children, it is not a leaf. - // That second condition there covers a weird corner case. It's - // obviously true--a last-level box won't have children. But why - // is it necessary? It turns out that nonchild_srcntgt_count is not - // available (i.e. zero) for boxes on the last level. So these boxes - // look like they got split if they have enough non-child srcntgts, - // to the first part of the 'if' condition. But in fact they weren't, - // because of their non-child srcntgts. - my_box_flags |= BOX_HAS_CHILDREN; %if sources_are_targets: @@ -1105,6 +1133,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, coord_dtype=coord_dtype, coord_vec_dtype=coord_vec_dtype, bbox_dtype=bbox_dtype, + refine_weight_dtype=refine_weight_dtype, particle_id_dtype=particle_id_dtype, morton_bin_count_dtype=morton_bin_count_dtype, morton_nr_dtype=morton_nr_dtype, @@ -1169,6 +1198,11 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(morton_bin_count_dtype, "box_morton_bin_counts"), # [nsrcntgts] + VectorArg(refine_weight_dtype, "refine_weights"), + # [nsrcntgts] + + ScalarArg(refine_weight_dtype, "max_leaf_refine_weight"), + # particle# at which each box starts VectorArg(particle_id_dtype, "box_srcntgt_starts"), # [nboxes] @@ -1186,7 +1220,6 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(box_id_dtype, "nboxes"), # [1] ScalarArg(np.int32, "level"), - ScalarArg(particle_id_dtype, "max_particles_in_box"), ScalarArg(bbox_dtype, "bbox"), VectorArg(particle_id_dtype, "user_srcntgt_ids"), # [nsrcntgts] @@ -1208,6 +1241,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, % ", ".join([ "i", "level", "&bbox", "morton_nrs", "user_srcntgt_ids", + "refine_weights", ] + ["%s" % ax for ax in axis_names] + (["srcntgt_radii"] if srcntgts_have_extent else []))), @@ -1231,11 +1265,13 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, ("box_id_t", box_id_dtype), ("morton_counts_t", morton_bin_count_dtype), ("box_level_t", box_level_dtype), + ("refine_weight_t", refine_weight_dtype), ), var_values=( ("dimensions", dimensions), ("srcntgts_have_extent", srcntgts_have_extent), ("adaptive", adaptive), + ("padded_bin", padded_bin), ), more_preamble=generic_preamble) @@ -1264,6 +1300,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(np.int32, "have_oversize_split_box", with_offset=True), VectorArg(box_id_dtype, "new_srcntgt_box_ids", with_offset=True), VectorArg(box_level_dtype, "box_levels", with_offset=True), + VectorArg(np.int32, "box_has_children", with_offset=True), ], str(split_and_sort_kernel_source), name="split_and_sort", preamble=( diff --git a/test/test_tree.py b/test/test_tree.py index 69fead4a9f65f7d1d57cd167a356bb77742dfd27..8ac66d42ba43936ca9856970035397b5efba0c29 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -84,7 +84,8 @@ def test_bounding_box(ctx_getter, dtype, dims, nparticles): # {{{ test basic (no source/target distinction) tree build def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, - max_particles_in_box=30, **kwargs): + max_particles_in_box=None, max_leaf_refine_weight=None, + refine_weights=None, **kwargs): dtype = np.dtype(dtype) if dtype == np.float32: @@ -101,9 +102,14 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, pytest.xfail("2D float doesn't work on POCL") logger.info(75*"-") - logger.info("%dD %s - %d particles - max %d per box - %s" % ( + if max_particles_in_box is not None: + logger.info("%dD %s - %d particles - max %d per box - %s" % ( dims, dtype.type.__name__, nparticles, max_particles_in_box, " - ".join("%s: %s" % (k, v) for k, v in six.iteritems(kwargs)))) + else: + logger.info("%dD %s - %d particles - max leaf weight %d - %s" % ( + dims, dtype.type.__name__, nparticles, max_leaf_refine_weight, + " - ".join("%s: %s" % (k, v) for k, v in six.iteritems(kwargs)))) logger.info(75*"-") particles = make_normal_particle_array(queue, nparticles, dims, dtype) @@ -115,8 +121,10 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, queue.finish() tree, _ = builder(queue, particles, - max_particles_in_box=max_particles_in_box, debug=True, - **kwargs) + max_particles_in_box=max_particles_in_box, + refine_weights=refine_weights, + max_leaf_refine_weight=max_leaf_refine_weight, + debug=True, **kwargs) tree = tree.get(queue=queue) sorted_particles = np.array(list(tree.sources)) @@ -125,6 +133,9 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, assert (sorted_particles == unsorted_particles[:, tree.user_source_ids]).all() + if refine_weights is not None: + refine_weights_reordered = refine_weights.get()[tree.user_source_ids] + all_good_so_far = True if do_plot: @@ -137,7 +148,6 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, scaled_tol = tol*tree.root_extent for ibox in range(tree.nboxes): - # Empty boxes exist in non-pruned trees--which themselves are undocumented. # These boxes will fail these tests. if not (tree.box_flags[ibox] & bfe.HAS_OWN_SRCNTGTS): @@ -178,6 +188,23 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, if not all_good_here: print("BAD BOX", ibox) + if not (tree.box_flags[ibox] & bfe.HAS_CHILDREN): + # Check that leaf particle density is as promised. + nparticles_in_box = tree.box_source_counts_cumul[ibox] + if max_particles_in_box is not None: + if nparticles_in_box > max_particles_in_box: + print("too many particles ({0} > {1}); box {2}".format( + nparticles_in_box, max_particles_in_box, ibox)) + all_good_here = False + else: + assert refine_weights is not None + box_weight = np.sum( + refine_weights_reordered[start:start+nparticles_in_box]) + if box_weight > max_leaf_refine_weight: + print("refine weight exceeded ({0} > {1}); box {2}".format( + box_weight, max_leaf_refine_weight, ibox)) + all_good_here = False + all_good_so_far = all_good_so_far and all_good_here if do_plot: @@ -192,10 +219,6 @@ def particle_tree_test_decorator(f): f = pytest.mark.parametrize("dtype", [np.float64, np.float32])(f) f = pytest.mark.parametrize("dims", [2, 3])(f) - def wrapper(*args, **kwargs): - logging.basicConfig(level=logging.INFO) - f(*args, **kwargs) - return f @@ -208,7 +231,7 @@ def test_single_box_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, - dtype, 4, do_plot=do_plot) + dtype, 4, max_particles_in_box=30, do_plot=do_plot) @particle_tree_test_decorator @@ -220,7 +243,7 @@ def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, - dtype, 50, do_plot=do_plot) + dtype, 50, max_particles_in_box=30, do_plot=do_plot) @particle_tree_test_decorator @@ -233,7 +256,7 @@ def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False): # test unpruned tree build run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=do_plot, skip_prune=True) + do_plot=do_plot, max_particles_in_box=30, skip_prune=True) @particle_tree_test_decorator @@ -245,7 +268,7 @@ def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=do_plot, nboxes_guess=5) + max_particles_in_box=30, do_plot=do_plot, nboxes_guess=5) @particle_tree_test_decorator @@ -258,7 +281,7 @@ def test_particle_tree_with_many_empty_leaves( builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=do_plot, max_particles_in_box=5) + max_particles_in_box=5, do_plot=do_plot) @particle_tree_test_decorator @@ -270,6 +293,30 @@ def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, + max_particles_in_box=30, do_plot=do_plot) + + +@particle_tree_test_decorator +def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, + do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) + + nparticles = 10**5 + + from pyopencl.clrandom import PhiloxGenerator + import random + random.seed(10) + rng = PhiloxGenerator(ctx) + refine_weights = cl.array.empty(queue, nparticles, np.int32) + evt = rng.fill_uniform(refine_weights, a=1, b=10) + cl.wait_for_events([evt]) + + run_build_test(builder, queue, dims, dtype, nparticles, + refine_weights=refine_weights, max_leaf_refine_weight=100, do_plot=do_plot) @@ -282,7 +329,7 @@ def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**4, - do_plot=do_plot, non_adaptive=True) + max_particles_in_box=30, do_plot=do_plot, non_adaptive=True) # }}}