From 3eb4c0dc02d4eaa05b7ada94316bc3a5a682e9eb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 29 Oct 2016 18:06:43 -0500 Subject: [PATCH 1/4] Fix area query issues (#7, #8). * To fix #7, we detect when the ball center is outside the bbox and handle appropriately. * To fix #8, we stop looking if the child fails to exist in the guiding box finder. --- boxtree/area_query.py | 136 ++++++++++++++++++++++++++++++++++-------- boxtree/tree_build.py | 3 +- test/test_tree.py | 104 +++++++++++++++++++++++--------- 3 files changed, 187 insertions(+), 56 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index cc58b75..f5b19f9 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -142,42 +142,122 @@ class LeavesToBallsLookup(DeviceDataRecord): # {{{ kernel templates GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// + <%def name="initialize_coord_vec(vector_name, entries)"> + <% assert len(entries) == dimensions %> + ${vector_name} = (coord_vec_t) (${", ".join(entries)}); + + <%def name="find_guiding_box(ball_center, ball_radius, box='guiding_box')"> box_id_t ${box} = 0; - - // Descend when root is not the guiding box. - if (LEVEL_TO_RAD(0) / 2 >= ${ball_radius}) { - for (unsigned box_level = 0;; ++box_level) + // + // Step 1: Ensure that the center is within the bounding box. + // + coord_vec_t query_center, bbox_min, bbox_max; + + ${initialize_coord_vec( + "bbox_min", ["bbox_min_" + ax for ax in AXIS_NAMES[:dimensions]])} + + // bbox_max should be smaller than the true bounding box, so that + // (query_center - bbox_min) / root_extent entries are in the half open + // interval [0, 1). + bbox_max = bbox_min + (coord_t) ( + root_extent / (1 + ${root_extent_stretch_factor})); + query_center = min(bbox_max, max(bbox_min, ${ball_center})); + + // + // Step 2: Compute the query radius. This can be effectively + // smaller than the original ball radius, if the center + // isn't in the bounding box (see picture): + // + // +-----------+ + // | | + // | | + // | | + // +-------|-+ | + // | | | | + // | +-----------+ <= bounding box + // | | + // | | + // +---------+ <= query box + // + // <---> <= original radius + // <> <= effective radius + // + coord_t query_radius = 0; + + %for mnr in range(2**dimensions): { - if (/* Found leaf? */ - !(box_flags[${box}] & BOX_HAS_CHILDREN) - /* Found guiding box? */ - || (LEVEL_TO_RAD(box_level) / 2 < ${ball_radius} - && ${ball_radius} <= LEVEL_TO_RAD(box_level))) + coord_vec_t offset; + + ${initialize_coord_vec("offset", + ["{sign}ball_radius".format( + sign="+" if ((2**dimensions-1-idim) & mnr) else "-") + for idim in range(dimensions)])} + + coord_vec_t corner = min( + bbox_max, max(bbox_min, ${ball_center} + offset)); + + coord_vec_t dist = fabs(corner - query_center); + for (int i = 0; i < ${dimensions}; ++i) { - break; + query_radius = fmax(query_radius, dist[i]); } + } + %endfor - // Find the child containing the ball center. - // - // Logic intended to match the morton nr scan kernel. + // + // Step 3: Find the guiding box. + // - %for ax in AXIS_NAMES[:dimensions]: - unsigned ${ax}_bits = (unsigned) ( - ((${ball_center}.${ax} - bbox_min_${ax}) / root_extent) - * (1U << (1 + box_level))); - %endfor + // Descend when root is not the guiding box. + if (LEVEL_TO_RAD(0) / 2 >= query_radius) + { + for (unsigned box_level = 0;; ++box_level) + { + if (/* Found leaf? */ + !(box_flags[${box}] & BOX_HAS_CHILDREN) + /* Found guiding box? */ + || (LEVEL_TO_RAD(box_level) / 2 < query_radius + && query_radius <= LEVEL_TO_RAD(box_level))) + { + break; + } - // Pick off the lowest-order bit for each axis, put it in its place. - int level_morton_number = 0 - %for iax, ax in enumerate(AXIS_NAMES[:dimensions]): - | (${ax}_bits & 1U) << (${dimensions-1-iax}) - %endfor - ; + // Find the child containing the ball center. + // + // Logic intended to match the morton nr scan kernel. + + coord_vec_t offset_scaled = + (query_center - bbox_min) / root_extent; + + // Invariant: offset_scaled entries are in [0, 1). + %for iax, ax in enumerate(AXIS_NAMES[:dimensions]): + unsigned ${ax}_bits = (unsigned) ( + offset_scaled[${iax}] * (1U << (1 + box_level))); + %endfor + + // Pick off the lowest-order bit for each axis, put it in + // its place. + int level_morton_number = 0 + %for iax, ax in enumerate(AXIS_NAMES[:dimensions]): + | (${ax}_bits & 1U) << (${dimensions-1-iax}) + %endfor + ; + + box_id_t next_box = box_child_ids[ + level_morton_number * aligned_nboxes + ${box}]; - ${box} = box_child_ids[ - level_morton_number * aligned_nboxes + ${box}]; + if (next_box) + { + ${box} = next_box; + } + else + { + // Child does not exist, this must be the guiding box + break; + } + } } } @@ -445,6 +525,7 @@ class AreaQueryElementwiseTemplate(object): from pyopencl.tools import dtype_to_ctype from boxtree import box_flags_enum from boxtree.traversal import TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES + from boxtree.tree_build import TreeBuilder render_vars = ( ("dimensions", dimensions), @@ -458,6 +539,7 @@ class AreaQueryElementwiseTemplate(object): ("box_flags_enum", box_flags_enum), ("peer_list_idx_dtype", peer_list_idx_dtype), ("debug", False), + ("root_extent_stretch_factor", TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR), # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) ("stick_out_factor", 0), ) @@ -560,6 +642,7 @@ class AreaQueryBuilder(object): logger.info("start building area query kernel") from boxtree.traversal import TRAVERSAL_PREAMBLE_TEMPLATE + from boxtree.tree_build import TreeBuilder template = Template( TRAVERSAL_PREAMBLE_TEMPLATE @@ -579,6 +662,7 @@ class AreaQueryBuilder(object): peer_list_idx_dtype=peer_list_idx_dtype, ball_id_dtype=ball_id_dtype, debug=False, + root_extent_stretch_factor=TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR, # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) stick_out_factor=0) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 1d3614c..cd665cf 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -56,6 +56,7 @@ class TreeBuilder(object): morton_nr_dtype = np.dtype(np.int8) box_level_dtype = np.dtype(np.uint8) + ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method def get_kernel_info(self, dimensions, coord_dtype, @@ -309,7 +310,7 @@ class TreeBuilder(object): root_extent = max( bbox["max_"+ax] - bbox["min_"+ax] - for ax in axis_names) * (1+1e-4) + for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) # make bbox square and slightly larger at the top, to ensure scaled # coordinates are always < 1 diff --git a/test/test_tree.py b/test/test_tree.py index 15e8f4e..9fc0833 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -658,34 +658,10 @@ def test_leaves_to_balls_query(ctx_getter, dims, do_plot=False): # {{{ area query test -@pytest.mark.opencl -@pytest.mark.area_query -@pytest.mark.parametrize("dims", [2, 3]) -def test_area_query(ctx_getter, dims, do_plot=False): - logging.basicConfig(level=logging.INFO) - - ctx = ctx_getter() - queue = cl.CommandQueue(ctx) - - nparticles = 10**5 - dtype = np.float64 - - particles = make_normal_particle_array(queue, nparticles, dims, dtype) - - if do_plot: - import matplotlib.pyplot as pt - pt.plot(particles[0].get(), particles[1].get(), "x") - - from boxtree import TreeBuilder - tb = TreeBuilder(ctx) - - queue.finish() - tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) - - nballs = 10**4 - ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) - ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) - +def run_area_query_test(ctx, queue, tree, ball_centers, ball_radii): + """ + Performs an area query and checks that the result is as expected. + """ from boxtree.area_query import AreaQueryBuilder aqb = AreaQueryBuilder(ctx) @@ -701,6 +677,7 @@ def test_area_query(ctx_getter, dims, do_plot=False): leaf_boxes, = (tree.box_flags & box_flags_enum.HAS_CHILDREN == 0).nonzero() leaf_box_radii = np.empty(len(leaf_boxes)) + dims = len(tree.sources) leaf_box_centers = np.empty((len(leaf_boxes), dims)) for idx, leaf_box in enumerate(leaf_boxes): @@ -719,7 +696,76 @@ def test_area_query(ctx_getter, dims, do_plot=False): start, end = area_query.leaves_near_ball_starts[ball_nr:ball_nr+2] found = area_query.leaves_near_ball_lists[start:end] actual = near_leaves - assert sorted(found) == sorted(actual) + assert set(found) == set(actual), (found, actual) + + +@pytest.mark.opencl +@pytest.mark.area_query +@pytest.mark.parametrize("dims", [2, 3]) +def test_area_query(ctx_getter, dims, do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + nparticles = 10**5 + dtype = np.float64 + + particles = make_normal_particle_array(queue, nparticles, dims, dtype) + + if do_plot: + import matplotlib.pyplot as pt + pt.plot(particles[0].get(), particles[1].get(), "x") + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + queue.finish() + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) + + nballs = 10**4 + ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) + ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) + + run_area_query_test(ctx, queue, tree, ball_centers, ball_radii) + + +@pytest.mark.opencl +@pytest.mark.area_query +@pytest.mark.parametrize("dims", [2, 3]) +def test_area_query_balls_outside_bbox(ctx_getter, dims, do_plot=False): + """ + The input to the area query includes balls whose centers are not within + the tree bounding box. + """ + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + nparticles = 10**4 + dtype = np.float64 + + particles = make_normal_particle_array(queue, nparticles, dims, dtype) + + if do_plot: + import matplotlib.pyplot as pt + pt.plot(particles[0].get(), particles[1].get(), "x") + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + queue.finish() + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) + + nballs = 10**4 + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(ctx, seed=13) + bbox_min = tree.bounding_box[0].min() + bbox_max = tree.bounding_box[1].max() + from pytools.obj_array import make_obj_array + ball_centers = make_obj_array([ + rng.uniform(queue, nballs, dtype=dtype, a=bbox_min-1, b=bbox_max+1) + for i in range(dims)]) + ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) + + run_area_query_test(ctx, queue, tree, ball_centers, ball_radii) @pytest.mark.opencl -- GitLab From d16e16fe6ede13e2f9b321943f0c6912e6d4584c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 29 Oct 2016 18:10:23 -0500 Subject: [PATCH 2/4] Area query: change area query template so that leaf_found_op gets run only when an overlapping leaf box is found. Previously, leaf_found_op got run whenever a candidate leaf box for overlap was found, and leaf_found_op was responsible for checking overlap. This change allows leaf_found_op to assume that all the leaves given to it are part of the area query, which simplifies the task inside leaf_found_op. --- boxtree/area_query.py | 63 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index f5b19f9..a5c7f7f 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -286,7 +286,15 @@ AREA_QUERY_WALKER_BODY = r""" if (!(box_flags[peer_box] & BOX_HAS_CHILDREN)) { - ${leaf_found_op("peer_box", "ball_center", "ball_radius")} + bool is_overlapping; + + ${check_l_infty_ball_overlap( + "is_overlapping", "peer_box", "ball_radius", "ball_center")} + + if (is_overlapping) + { + ${leaf_found_op("peer_box", "ball_center", "ball_radius")} + } } else { @@ -301,8 +309,17 @@ AREA_QUERY_WALKER_BODY = r""" { if (!(box_flags[child_box_id] & BOX_HAS_CHILDREN)) { - ${leaf_found_op("child_box_id", "ball_center", - "ball_radius")} + bool is_overlapping; + + ${check_l_infty_ball_overlap( + "is_overlapping", "child_box_id", + "ball_radius", "ball_center")} + + if (is_overlapping) + { + ${leaf_found_op( + "child_box_id", "ball_center", "ball_radius")} + } } else { @@ -333,17 +350,7 @@ AREA_QUERY_TEMPLATE = ( <%def name="leaf_found_op(leaf_box_id, ball_center, ball_radius)"> - { - bool is_overlapping; - - ${check_l_infty_ball_overlap( - "is_overlapping", leaf_box_id, ball_radius, ball_center)} - - if (is_overlapping) - { - APPEND_leaves(${leaf_box_id}); - } - } + APPEND_leaves(${leaf_box_id}); void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t i) @@ -586,9 +593,6 @@ SPACE_INVADER_QUERY_TEMPLATE = AreaQueryElementwiseTemplate( leaf_found_op=r""" { ${load_center("leaf_center", leaf_box_id)} - int leaf_level = box_levels[${leaf_box_id}]; - - coord_t size_sum = LEVEL_TO_RAD(leaf_level) + ${ball_radius}; coord_t max_dist = 0; %for i in range(dimensions): @@ -596,21 +600,16 @@ SPACE_INVADER_QUERY_TEMPLATE = AreaQueryElementwiseTemplate( distance(${ball_center}.s${i}, leaf_center.s${i})); %endfor - bool is_overlapping = max_dist <= size_sum; - - if (is_overlapping) - { - // The atomic max operation supports only integer types. - // However, max_dist is of a floating point type. - // For comparison purposes we reinterpret the bits of max_dist - // as an integer. The comparison result is the same as for positive - // IEEE floating point numbers, so long as the float/int endianness - // matches (fingers crossed). - atomic_max( - (volatile __global int *) - &outer_space_invader_dists[${leaf_box_id}], - as_int((float) max_dist)); - } + // The atomic max operation supports only integer types. + // However, max_dist is of a floating point type. + // For comparison purposes we reinterpret the bits of max_dist + // as an integer. The comparison result is the same as for positive + // IEEE floating point numbers, so long as the float/int endianness + // matches (fingers crossed). + atomic_max( + (volatile __global int *) + &outer_space_invader_dists[${leaf_box_id}], + as_int((float) max_dist)); }""", name="space_invader_query") -- GitLab From 932400a0e4daf1adad5f8f9d01eacc7238886381 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 29 Oct 2016 18:26:29 -0500 Subject: [PATCH 3/4] Fix vector subscripts to make the CUDA compiler happy. --- boxtree/area_query.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index a5c7f7f..4e8ae3e 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -199,10 +199,9 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// bbox_max, max(bbox_min, ${ball_center} + offset)); coord_vec_t dist = fabs(corner - query_center); - for (int i = 0; i < ${dimensions}; ++i) - { - query_radius = fmax(query_radius, dist[i]); - } + %for i in range(dimensions): + query_radius = fmax(query_radius, dist.s${i}); + %endfor } %endfor @@ -232,9 +231,9 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// (query_center - bbox_min) / root_extent; // Invariant: offset_scaled entries are in [0, 1). - %for iax, ax in enumerate(AXIS_NAMES[:dimensions]): + %for ax in AXIS_NAMES[:dimensions]: unsigned ${ax}_bits = (unsigned) ( - offset_scaled[${iax}] * (1U << (1 + box_level))); + offset_scaled.${ax} * (1U << (1 + box_level))); %endfor // Pick off the lowest-order bit for each axis, put it in -- GitLab From 7ac16e510a5a3b28b9fe9c44620235c691604680 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 29 Oct 2016 18:33:32 -0500 Subject: [PATCH 4/4] Fix misplaced parentheses --- boxtree/area_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 4e8ae3e..9548c67 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -192,7 +192,7 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// ${initialize_coord_vec("offset", ["{sign}ball_radius".format( - sign="+" if ((2**dimensions-1-idim) & mnr) else "-") + sign="+" if (2**(dimensions-1-idim) & mnr) else "-") for idim in range(dimensions)])} coord_vec_t corner = min( -- GitLab