diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index a0522a03007676804be0e1371fd2474793168d17..e1d83c5c9c6c19062ab11656e2e0923db7a8cf9e 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -61,20 +61,21 @@ class TreeBuilder(object): def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, box_id_dtype, sources_are_targets, srcntgts_have_extent, - stick_out_factor): + stick_out_factor, adaptive): from boxtree.tree_build_kernels import get_tree_build_kernel_info return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, particle_id_dtype, box_id_dtype, sources_are_targets, srcntgts_have_extent, - stick_out_factor, self.morton_nr_dtype, self.box_level_dtype) + stick_out_factor, self.morton_nr_dtype, self.box_level_dtype, + adaptive=adaptive) # {{{ run control def __call__(self, queue, particles, max_particles_in_box, allocator=None, debug=False, targets=None, source_radii=None, target_radii=None, stick_out_factor=0.25, - wait_for=None, + wait_for=None, non_adaptive=False, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -93,6 +94,9 @@ class TreeBuilder(object): :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` instances for whose completion this command waits before starting exeuction. + :arg non_adaptive: If *True*, return a tree in which all leaf boxes are + on the same (last) level. The tree is pruned, in the sense that empty + boxes have been eliminated. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -163,7 +167,7 @@ class TreeBuilder(object): knl_info = self.get_kernel_info(dimensions, coord_dtype, particle_id_dtype, box_id_dtype, sources_are_targets, srcntgts_have_extent, - stick_out_factor) + stick_out_factor, adaptive=not non_adaptive) # {{{ combine sources and targets into one array, if necessary diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index 610de4003de027c686a677e50063e140056e5f8a..da844884b8da1386286876420c96bf4449f05f02 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -432,9 +432,16 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_levels[box_id] + 1 == level %endif && - /* box overfull? */ - box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box - > max_particles_in_box) + %if adaptive: + /* box overfull? */ + box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box + > max_particles_in_box + %else: + /* box non-empty? */ + box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box + > 0 + %endif + ) { result += ${2**dimensions}; } @@ -508,9 +515,15 @@ SPLIT_AND_SORT_KERNEL_TPL = Template(r"""//CL// const particle_id_t nonchild_srcntgt_count = 0; %endif - bool do_split_box = - box_srcntgt_count - nonchild_srcntgt_count - > max_particles_in_box; + %if adaptive: + bool do_split_box = + box_srcntgt_count - nonchild_srcntgt_count + > max_particles_in_box; + %else: + bool do_split_box = + box_srcntgt_count - nonchild_srcntgt_count + > 0; + %endif %if srcntgts_have_extent: ## Only do split-box processing for srcntgts that were touched @@ -614,6 +627,8 @@ SPLIT_AND_SORT_KERNEL_TPL = Template(r"""//CL// box_srcntgt_counts_cumul[new_box_id] = new_count; box_levels[new_box_id] = level; + // For a non-adaptive run, max_particles_in_box drives the + // level loop. if (new_count > max_particles_in_box) { *have_oversize_split_box = 1; @@ -937,7 +952,12 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( PYOPENCL_ELWISE_CONTINUE; } - else if (particle_count - nonchild_srcntgt_count > max_particles_in_box + else if ( + %if adaptive: + particle_count - nonchild_srcntgt_count > max_particles_in_box + %else: + particle_count - nonchild_srcntgt_count > 0 + %endif && box_levels[box_id] + 1 < nlevels) { // This box has children, it is not a leaf. @@ -1045,7 +1065,8 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( def get_tree_build_kernel_info(context, dimensions, coord_dtype, particle_id_dtype, box_id_dtype, sources_are_targets, srcntgts_have_extent, - stick_out_factor, morton_nr_dtype, box_level_dtype): + stick_out_factor, morton_nr_dtype, box_level_dtype, + adaptive): logger.info("start building tree build kernels") @@ -1092,6 +1113,8 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, AXIS_NAMES=AXIS_NAMES, box_flags_enum=box_flags_enum, + adaptive=adaptive, + sources_are_targets=sources_are_targets, srcntgts_have_extent=srcntgts_have_extent, @@ -1180,13 +1203,14 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, morton_count_scan = GenericScanKernel( context, morton_bin_count_dtype, arguments=common_arguments, - input_expr="scan_t_from_particle(%s)" + input_expr=( + "scan_t_from_particle(%s)" % ", ".join([ "i", "level", "&bbox", "morton_nrs", "user_srcntgt_ids", ] + ["%s" % ax for ax in axis_names] - + (["srcntgt_radii"] if srcntgts_have_extent else [])), + + (["srcntgt_radii"] if srcntgts_have_extent else []))), scan_expr="scan_t_add(a, b, across_seg_boundary)", neutral="scan_t_neutral()", is_segment_start_expr="box_start_flags[i]", @@ -1211,6 +1235,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, var_values=( ("dimensions", dimensions), ("srcntgts_have_extent", srcntgts_have_extent), + ("adaptive", adaptive), ), more_preamble=generic_preamble) @@ -1381,8 +1406,8 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, split_box_id_scan=split_box_id_scan, split_and_sort_kernel=split_and_sort_kernel, - extract_nonchild_srcntgt_count_kernel= - extract_nonchild_srcntgt_count_kernel, + extract_nonchild_srcntgt_count_kernel=( + extract_nonchild_srcntgt_count_kernel), find_prune_indices_kernel=find_prune_indices_kernel, srcntgt_permuter=srcntgt_permuter, source_counter=source_counter, diff --git a/test/test_tree.py b/test/test_tree.py index c44482f270ef9acb96ba3933a89a16b71cbac1f9..7e61aa333062b2fb0efdedf3059be0c947623015 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -181,43 +181,108 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, assert all_good_so_far -@pytest.mark.opencl -@pytest.mark.parametrize("dtype", [np.float64, np.float32]) -@pytest.mark.parametrize("dims", [2, 3]) -def test_particle_tree(ctx_getter, dtype, dims, do_plot=False): - logging.basicConfig(level=logging.INFO) +def particle_tree_test_decorator(f): + f = pytest.mark.opencl(f) + f = pytest.mark.parametrize("dtype", [np.float64, np.float32])(f) + f = pytest.mark.parametrize("dims", [2, 3])(f) + + def wrapper(*args, **kwargs): + logging.basicConfig(level=logging.INFO) + f(*args, **kwargs) + return f + + +@particle_tree_test_decorator +def test_single_boxparticle_tree(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) - # test single-box corner case run_build_test(builder, queue, dims, - dtype, 4, do_plot=False) + dtype, 4, do_plot=do_plot) + + +@particle_tree_test_decorator +def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) - # test bi-level corner case run_build_test(builder, queue, dims, - dtype, 50, do_plot=False) + dtype, 50, do_plot=do_plot) + + +@particle_tree_test_decorator +def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) # test unpruned tree build run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=False, skip_prune=True) + do_plot=do_plot, skip_prune=True) + + +@particle_tree_test_decorator +def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) - # exercise reallocation code run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=False, nboxes_guess=5) + do_plot=do_plot, nboxes_guess=5) + + +@particle_tree_test_decorator +def test_particle_tree_with_many_empty_leaves( + ctx_getter, dtype, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) - # test many empty leaves corner case run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=False, max_particles_in_box=5) + do_plot=do_plot, max_particles_in_box=5) + + +@particle_tree_test_decorator +def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) - # test vanilla tree build run_build_test(builder, queue, dims, dtype, 10**5, do_plot=do_plot) +@particle_tree_test_decorator +def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) + + run_build_test(builder, queue, dims, dtype, 10**4, + do_plot=do_plot, non_adaptive=True) + +# }}} + + +# {{{ source/target tree + @pytest.mark.opencl @pytest.mark.parametrize("dims", [2, 3]) def test_source_target_tree(ctx_getter, dims, do_plot=False): @@ -239,6 +304,7 @@ def test_source_target_tree(ctx_getter, dims, do_plot=False): import matplotlib.pyplot as pt pt.plot(sources[0].get(), sources[1].get(), "rx") pt.plot(targets[0].get(), targets[1].get(), "g+") + pt.show() from boxtree import TreeBuilder tb = TreeBuilder(ctx)