From 6b11a0471525bcfa999f2b75c800bad9c77c7a4f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 2 Sep 2016 15:26:37 -0500 Subject: [PATCH 1/8] Add space invader query (box -> distance to furthest overlapping center). --- boxtree/area_query.py | 394 +++++++++++++++++++++++++++++++++++------- test/test_tree.py | 86 ++++++++- 2 files changed, 409 insertions(+), 71 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 2742a26..9c08b0b 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -54,6 +54,12 @@ Inverse of area query (Leaves -> overlapping balls) .. autoclass:: LeavesToBallsLookup +Space invader queries +^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: SpaceInvaderQueryBuilder + + Peer Lists ^^^^^^^^^^ @@ -134,7 +140,75 @@ class LeavesToBallsLookup(DeviceDataRecord): # {{{ kernel templates -AREA_QUERY_TEMPLATE = r"""//CL// +GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// + <%def name="find_guiding_box(ball_center, ball_radius)"> + ${walk_init(0)} + box_id_t guiding_box; + + if (LEVEL_TO_RAD(0) < ball_radius / 2 || !(box_flags[0] & BOX_HAS_CHILDREN)) + { + guiding_box = 0; + continue_walk = false; + } + + while (continue_walk) + { + // Get the next child. + box_id_t child_box_id = box_child_ids[ + walk_morton_nr * aligned_nboxes + walk_box_id]; + + bool last_child = walk_morton_nr == ${2**dimensions - 1}; + + if (child_box_id) + { + bool contains_ball_center; + int child_level = walk_level + 1; + coord_t child_rad = LEVEL_TO_RAD(child_level); + + { + // Check if the child contains the ball's center. + ${load_center("child_center", "child_box_id")} + + coord_t max_dist = 0; + %for i in range(dimensions): + max_dist = fmax(max_dist, + fabs(ball_center.s${i} - child_center.s${i})); + %endfor + + contains_ball_center = max_dist <= child_rad; + } + + if (contains_ball_center) + { + if ((child_rad / 2 < ball_radius && ball_radius <= child_rad) || + !(box_flags[child_box_id] & BOX_HAS_CHILDREN)) + { + guiding_box = child_box_id; + break; + } + + // We want to descend into this box. Put the current state + // on the stack. + ${walk_push("child_box_id")} + continue; + } + } + + if (last_child) + { + // This box has no children that contain the center, so it must + // be the guiding box. + guiding_box = walk_box_id; + break; + } + + ${walk_advance()} + } + +""" + + +AREA_QUERY_TEMPLATE = GUIDING_BOX_FINDER_MACRO + r"""//CL// typedef ${dtype_to_ctype(ball_id_dtype)} ball_id_t; typedef ${dtype_to_ctype(peer_list_idx_dtype)} peer_list_idx_t; @@ -165,74 +239,12 @@ void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) // Step 1: Find the guiding box. // /////////////////////////////////// - ${walk_init(0)} - box_id_t guiding_box; - - if (LEVEL_TO_RAD(0) < ball_radius / 2 || !(box_flags[0] & BOX_HAS_CHILDREN)) - { - guiding_box = 0; - continue_walk = false; - } - - while (continue_walk) - { - // Get the next child. - box_id_t child_box_id = box_child_ids[ - walk_morton_nr * aligned_nboxes + walk_box_id]; - - bool last_child = walk_morton_nr == ${2**dimensions - 1}; - - if (child_box_id) - { - bool contains_ball_center; - int child_level = walk_level + 1; - coord_t child_rad = LEVEL_TO_RAD(child_level); - - { - // Check if the child contains the ball's center. - ${load_center("child_center", "child_box_id")} - - coord_t max_dist = 0; - %for i in range(dimensions): - max_dist = fmax(max_dist, - fabs(ball_center.s${i} - child_center.s${i})); - %endfor - - contains_ball_center = max_dist <= child_rad; - } - - if (contains_ball_center) - { - if ((child_rad / 2 < ball_radius && ball_radius <= child_rad) || - !(box_flags[child_box_id] & BOX_HAS_CHILDREN)) - { - guiding_box = child_box_id; - break; - } - - // We want to descend into this box. Put the current state - // on the stack. - ${walk_push("child_box_id")} - continue; - } - } - - if (last_child) - { - // This box has no children that contain the center, so it must - // be the guiding box. - guiding_box = walk_box_id; - break; - } - - ${walk_advance()} - } + ${find_guiding_box("ball_center", "ball_radius")} ////////////////////////////////////////////////////// // Step 2 - Walk the peer boxes to find the leaves. // ////////////////////////////////////////////////////// - for (peer_list_idx_t pb_i = peer_list_starts[guiding_box], pb_e = peer_list_starts[guiding_box+1]; pb_i < pb_e; ++pb_i) { @@ -394,6 +406,99 @@ STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( """, name="starts_expander") + +SPACE_INVADER_QUERY_TEMPLATE = GUIDING_BOX_FINDER_MACRO + r"""//CL:mako// + <%def name="check_if_found_outer_space_invader(box_id)"> + { + ${load_center("box_center", box_id)} + int box_level = box_levels[${box_id}]; + + coord_t size_sum = LEVEL_TO_RAD(box_level) + ball_radius; + + coord_t max_dist = 0; + %for i in range(dimensions): + max_dist = fmax(max_dist, + fabs(ball_center.s${i} - box_center.s${i})); + %endfor + + bool is_overlapping = max_dist <= size_sum; + + if (is_overlapping) + { + // The atomic max operation supports only integer types. + // However, max_dist is of a floating point type. + // For comparison purposes we reinterpret the bits of max_dist + // as an integer. The comparison result is the same as for positive + // IEEE floating point numbers, so long as the float/int endianness + // matches (fingers crossed). + atomic_max( + (volatile __global int *) &outer_space_invader_dists[${box_id}], + as_int((float) max_dist)); + } + } + + + typedef ${dtype_to_ctype(ball_id_dtype)} ball_id_t; + typedef ${dtype_to_ctype(peer_list_idx_dtype)} peer_list_idx_t; + + ball_id_t ball_nr = i; + + coord_vec_t ball_center; + %for i in range(dimensions): + ball_center.${AXIS_NAMES[i]} = ball_${AXIS_NAMES[i]}[ball_nr]; + %endfor + + coord_t ball_radius = ball_radii[ball_nr]; + + /////////////////////////////////// + // Step 1: Find the guiding box. // + /////////////////////////////////// + + ${find_guiding_box("ball_center", "ball_radius")} + + ////////////////////////////////////////////////////// + // Step 2 - Walk the peer boxes to find the leaves. // + ////////////////////////////////////////////////////// + + for (peer_list_idx_t pb_i = peer_list_starts[guiding_box], + pb_e = peer_list_starts[guiding_box+1]; pb_i < pb_e; ++pb_i) + { + box_id_t peer_box = peer_lists[pb_i]; + + if (!(box_flags[peer_box] & BOX_HAS_CHILDREN)) + { + ${check_if_found_outer_space_invader("peer_box")} + } + else + { + ${walk_reset("peer_box")} + + while (continue_walk) + { + box_id_t child_box_id = box_child_ids[ + walk_morton_nr * aligned_nboxes + walk_box_id]; + + if (child_box_id) + { + if (!(box_flags[child_box_id] & BOX_HAS_CHILDREN)) + { + ${check_if_found_outer_space_invader("child_box_id")} + } + else + { + // We want to descend into this box. Put the current state + // on the stack. + ${walk_push("child_box_id")} + continue; + } + } + + ${walk_advance()} + } + } + } +""" + # }}} @@ -540,11 +645,11 @@ class AreaQueryBuilder(object): leaves_near_ball_starts=result["leaves"].starts, leaves_near_ball_lists=result["leaves"].lists).with_queue(None), evt - # }}} # {{{ area query transpose (leaves-to-balls) lookup build + class LeavesToBallsLookupBuilder(object): """Given a set of :math:`l^\infty` "balls", this class helps build a look-up table from leaf boxes to balls that overlap with each leaf box. @@ -641,6 +746,165 @@ class LeavesToBallsLookupBuilder(object): balls_near_box_starts=balls_near_box_starts, balls_near_box_lists=balls_near_box_lists).with_queue(None), evt +# }}} + +# {{{ space invader query build + +class SpaceInvaderQueryBuilder(object): + """Given a set of :math:`l^\infty` "balls", this class helps build a look-up + table from leaf box to max center-to-center distance with an overlapping + ball. + + .. automethod:: __call__ + + """ + def __init__(self, context): + self.context = context + self.peer_list_finder = PeerListFinder(self.context) + + # {{{ Kernel generation + + @memoize_method + def get_space_invader_query_kernel(self, dimensions, coord_dtype, + box_id_dtype, ball_id_dtype, peer_list_idx_dtype, max_levels): + from pyopencl.tools import dtype_to_ctype + from boxtree import box_flags_enum + + logger.info("start building space invader query kernel") + + from boxtree.traversal import ( + TRAVERSAL_PREAMBLE_MAKO_DEFS, + TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES) + + template = Template( + TRAVERSAL_PREAMBLE_MAKO_DEFS + + SPACE_INVADER_QUERY_TEMPLATE, + strict_undefined=True) + + preamble = Template(TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES, + strict_undefined=True) + + render_vars = dict( + dimensions=dimensions, + dtype_to_ctype=dtype_to_ctype, + box_id_dtype=box_id_dtype, + particle_id_dtype=None, + coord_dtype=coord_dtype, + vec_types=cl.array.vec.types, + max_levels=max_levels, + AXIS_NAMES=AXIS_NAMES, + box_flags_enum=box_flags_enum, + peer_list_idx_dtype=peer_list_idx_dtype, + ball_id_dtype=ball_id_dtype, + debug=False, + # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) + stick_out_factor=0) + + from pyopencl.tools import VectorArg, ScalarArg + arg_decls = [ + VectorArg(coord_dtype, "box_centers"), + ScalarArg(coord_dtype, "root_extent"), + VectorArg(np.uint8, "box_levels"), + ScalarArg(box_id_dtype, "aligned_nboxes"), + VectorArg(box_id_dtype, "box_child_ids"), + VectorArg(box_flags_enum.dtype, "box_flags"), + VectorArg(peer_list_idx_dtype, "peer_list_starts"), + VectorArg(box_id_dtype, "peer_lists"), + VectorArg(coord_dtype, "ball_radii"), + VectorArg(np.float32, "outer_space_invader_dists"), + ] + [ + VectorArg(coord_dtype, "ball_"+ax) + for ax in AXIS_NAMES[:dimensions]] + + from pyopencl.elementwise import ElementwiseKernel + space_invader_query_kernel = ElementwiseKernel( + self.context, + arguments=arg_decls, + operation=str(template.render(**render_vars)), + name="space_invaders_query", + preamble=preamble.render(**render_vars)) + + logger.info("done building space invader query kernel") + return space_invader_query_kernel + + # }}} + + def __call__(self, queue, tree, ball_centers, ball_radii, peer_lists=None, + wait_for=None): + """:arg queue: a :class:`pyopencl.CommandQueue` + :arg tree: a :class:`boxtree.Tree`. + :arg ball_centers: an object array of coordinate + :class:`pyopencl.array.Array` instances. + Their *dtype* must match *tree*'s + :attr:`boxtree.Tree.coord_dtype`. + :arg ball_radii: a + :class:`pyopencl.array.Array` + of positive numbers. + Its *dtype* must match *tree*'s + :attr:`boxtree.Tree.coord_dtype`. + :arg peer_lists: may either be *None* or an instance of + :class:`PeerListLookup` associated with `tree`. + :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` + instances for whose completion this command waits before starting + execution. + :returns: a tuple *(sqi, event)*, where *sqi* is an instance of + :class:`pyopencl.array`, and *event* is a :class:`pyopencl.Event` + for dependency management. + """ + + from pytools import single_valued + if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype: + raise TypeError("ball_centers dtype must match tree.coord_dtype") + if ball_radii.dtype != tree.coord_dtype: + raise TypeError("ball_radii dtype must match tree.coord_dtype") + + ball_id_dtype = tree.particle_id_dtype # ? + + from pytools import div_ceil + # Avoid generating too many kernels. + max_levels = div_ceil(tree.nlevels, 10) * 10 + + if peer_lists is None: + peer_lists, evt = self.peer_list_finder(queue, tree, wait_for=wait_for) + wait_for = [evt] + + if len(peer_lists.peer_list_starts) != tree.nboxes + 1: + raise ValueError("size of peer lists must match with number of boxes") + + space_invader_query_kernel = self.get_space_invader_query_kernel( + tree.dimensions, tree.coord_dtype, tree.box_id_dtype, ball_id_dtype, + peer_lists.peer_list_starts.dtype, max_levels) + + logger.info("space invader query: run space invader query") + + outer_space_invader_dists = cl.array.zeros(queue, tree.nboxes, np.float32) + wait_for.extend(outer_space_invader_dists.events) + + evt = space_invader_query_kernel( + tree.box_centers, tree.root_extent, + tree.box_levels, tree.aligned_nboxes, + tree.box_child_ids, tree.box_flags, + peer_lists.peer_list_starts, + peer_lists.peer_lists, + ball_radii, + outer_space_invader_dists, + *tuple(bc for bc in ball_centers), + wait_for=wait_for, + queue=queue, + slice=slice(len(ball_radii))) + + if tree.coord_dtype != np.dtype(np.float32): + # The kernel output is always an array of float32 due to limited + # support for atomic operations with float64 in OpenCL. + # Here the output is cast to match the coord dtype. + outer_space_invader_dists.finish() + outer_space_invader_dists = outer_space_invader_dists.astype( + tree.coord_dtype) + evt, = outer_space_invader_dists.events + + logger.info("space invader query: done") + + return outer_space_invader_dists, evt # }}} diff --git a/test/test_tree.py b/test/test_tree.py index 18210aa..5e8bb3b 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -308,12 +308,8 @@ def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, nparticles = 10**5 from pyopencl.clrandom import PhiloxGenerator - import random - random.seed(10) - rng = PhiloxGenerator(ctx) - refine_weights = cl.array.empty(queue, nparticles, np.int32) - evt = rng.fill_uniform(refine_weights, a=1, b=10) - cl.wait_for_events([evt]) + rng = PhiloxGenerator(ctx, seed=10) + refine_weights = rng.uniform(queue, nparticles, dtype=np.int32, a=1, b=10) run_build_test(builder, queue, dims, dtype, nparticles, refine_weights=refine_weights, max_leaf_refine_weight=100, @@ -728,6 +724,84 @@ def test_area_query(ctx_getter, dims, do_plot=False): # }}} +# {{{ space invader query test + +@pytest.mark.opencl +@pytest.mark.geo_lookup +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("dims", [2, 3]) +def test_space_invader_query(ctx_getter, dims, dtype, do_plot=False): + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + if (dtype == np.float32 + and dims == 2 + and queue.device.platform.name == "Portable Computing Language"): + # arg list lenghts disagree + pytest.xfail("2D float doesn't work on POCL") + + dtype = np.dtype(dtype) + nparticles = 10**5 + + particles = make_normal_particle_array(queue, nparticles, dims, dtype) + + if do_plot: + import matplotlib.pyplot as pt + pt.plot(particles[0].get(), particles[1].get(), "x") + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + queue.finish() + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) + + nballs = 10**4 + ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) + ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) + + from boxtree.area_query import ( + LeavesToBallsLookupBuilder, SpaceInvaderQueryBuilder) + + siqb = SpaceInvaderQueryBuilder(ctx) + # We can use leaves-to-balls lookup to get the set of overlapping balls for + # each box, and from there to compute the outer space invader distance. + lblb = LeavesToBallsLookupBuilder(ctx) + + siq, _ = siqb(queue, tree, ball_centers, ball_radii) + lbl, _ = lblb(queue, tree, ball_centers, ball_radii) + + # get data to host for test + tree = tree.get(queue=queue) + siq = siq.get(queue=queue) + lbl = lbl.get(queue=queue) + + ball_centers = np.array([x.get() for x in ball_centers]) + ball_radii = ball_radii.get() + + # Find leaf boxes. + from boxtree import box_flags_enum + + outer_space_invader_dist = np.zeros(tree.nboxes) + + for ibox in range(tree.nboxes): + # We only want leaves here. + if tree.box_flags[ibox] & box_flags_enum.HAS_CHILDREN: + continue + + start, end = lbl.balls_near_box_starts[ibox:ibox + 2] + space_invaders = lbl.balls_near_box_lists[start:end] + if len(space_invaders) > 0: + outer_space_invader_dist[ibox] = np.max(np.abs( + tree.box_centers[:, ibox].reshape((-1, 1)) + - ball_centers[:, space_invaders])) + + assert np.allclose(siq, outer_space_invader_dist) + +# }}} + + # {{{ level restriction test @pytest.mark.opencl -- GitLab From 5c04e2a9d4a9220ea03f1b444775546bcddd1bb6 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 7 Sep 2016 19:38:37 -0500 Subject: [PATCH 2/8] Implement AreaQueryElementwiseTemplate; remove redundant code. --- boxtree/area_query.py | 321 ++++++++++++++++++++++-------------------- boxtree/traversal.py | 4 +- 2 files changed, 170 insertions(+), 155 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 9c08b0b..c67e8e2 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -208,32 +208,12 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// """ -AREA_QUERY_TEMPLATE = GUIDING_BOX_FINDER_MACRO + r"""//CL// -typedef ${dtype_to_ctype(ball_id_dtype)} ball_id_t; -typedef ${dtype_to_ctype(peer_list_idx_dtype)} peer_list_idx_t; - -<%def name="add_box_to_list_if_overlaps_ball(box_id)"> - { - bool is_overlapping; - - ${check_l_infty_ball_overlap( - "is_overlapping", box_id, "ball_radius", "ball_center")} - - if (is_overlapping) - { - APPEND_leaves(${box_id}); - } - } - - -void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) -{ +AREA_QUERY_WALKER_BODY = r""" coord_vec_t ball_center; - %for i in range(dimensions): - ball_center.${AXIS_NAMES[i]} = ball_${AXIS_NAMES[i]}[ball_nr]; - %endfor + ${get_ball_center("ball_center", "i")} - coord_t ball_radius = ball_radii[ball_nr]; + coord_t ball_radius; + ${get_ball_radius("ball_radius", "i")} /////////////////////////////////// // Step 1: Find the guiding box. // @@ -252,7 +232,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) if (!(box_flags[peer_box] & BOX_HAS_CHILDREN)) { - ${add_box_to_list_if_overlaps_ball("peer_box")} + ${leaf_found_op("peer_box", "ball_center", "ball_radius")} } else { @@ -267,7 +247,8 @@ void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) { if (!(box_flags[child_box_id] & BOX_HAS_CHILDREN)) { - ${add_box_to_list_if_overlaps_ball("child_box_id")} + ${leaf_found_op("child_box_id", "ball_center", + "ball_radius")} } else { @@ -282,9 +263,48 @@ void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) } } } -} """ + +AREA_QUERY_TEMPLATE = ( + GUIDING_BOX_FINDER_MACRO + r"""//CL// + typedef ${dtype_to_ctype(ball_id_dtype)} ball_id_t; + typedef ${dtype_to_ctype(peer_list_idx_dtype)} peer_list_idx_t; + + <%def name="get_ball_center(ball_center, i)"> + %for ax in AXIS_NAMES[:dimensions]: + ${ball_center}.${ax} = ball_${ax}[${i}]; + %endfor + + + <%def name="get_ball_radius(ball_radius, i)"> + ${ball_radius} = ball_radii[${i}]; + + + <%def name="leaf_found_op(leaf_box_id, ball_center, ball_radius)"> + { + bool is_overlapping; + + ${check_l_infty_ball_overlap( + "is_overlapping", leaf_box_id, ball_radius, ball_center)} + + if (is_overlapping) + { + APPEND_leaves(${leaf_box_id}); + } + } + + + void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) + { + ball_id_t i = ball_nr; + """ + + AREA_QUERY_WALKER_BODY + + """ + } + """) + + PEER_LIST_FINDER_TEMPLATE = r"""//CL// void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t box_id) @@ -407,18 +427,119 @@ STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( name="starts_expander") -SPACE_INVADER_QUERY_TEMPLATE = GUIDING_BOX_FINDER_MACRO + r"""//CL:mako// - <%def name="check_if_found_outer_space_invader(box_id)"> +class AreaQueryElementwiseTemplate(object): + # FIXME: Document. + + def __init__(self, extra_args, ball_center_expr, ball_radius_expr, + leaf_found_op, name="area_query_elwise"): + + def wrap_in_macro(decl, expr): + return """ + <%def name=\"{decl}\"> + {expr} + + """.format(decl=decl, expr=expr) + + from boxtree.traversal import TRAVERSAL_PREAMBLE_MAKO_DEFS + + self.elwise_template = ElementwiseTemplate( + arguments=r"""//CL:mako// + coord_t *box_centers, + coord_t root_extent, + box_level_t *box_levels, + box_id_t aligned_nboxes, + box_id_t *box_child_ids, + box_flags_t *box_flags, + peer_list_idx_t *peer_list_starts, + box_id_t *peer_lists, + """ + extra_args, + operation="//CL:mako//\n" + + wrap_in_macro("get_ball_center(ball_center, i)", ball_center_expr) + + wrap_in_macro("get_ball_radius(ball_radius, i)", ball_radius_expr) + + wrap_in_macro("leaf_found_op(leaf_box_id, ball_center, ball_radius)", + leaf_found_op) + + TRAVERSAL_PREAMBLE_MAKO_DEFS + + GUIDING_BOX_FINDER_MACRO + + AREA_QUERY_WALKER_BODY, + name=name) + + def generate(self, context, + dimensions, coord_dtype, box_id_dtype, ball_id_dtype, + peer_list_idx_dtype, max_levels, + extra_var_values=(), extra_type_aliases=(), + extra_preamble=""): + from pyopencl.tools import dtype_to_ctype + from boxtree import box_flags_enum + from boxtree.traversal import TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES + + render_vars = ( + ("dimensions", dimensions), + ("dtype_to_ctype", dtype_to_ctype), + ("box_id_dtype", box_id_dtype), + ("particle_id_dtype", None), + ("coord_dtype", coord_dtype), + ("vec_types", tuple(cl.array.vec.types.items())), + ("max_levels", max_levels), + ("AXIS_NAMES", AXIS_NAMES), + ("box_flags_enum", box_flags_enum), + ("peer_list_idx_dtype", peer_list_idx_dtype), + ("ball_id_dtype", ball_id_dtype), + ("debug", False), + # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) + ("stick_out_factor", 0), + ) + + preamble = Template( + # HACK: box_flags_t and coord_t are defined here and + # in the template below, so disable typedef redifinition warnings. + """ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wtypedef-redefinition" + """ + + TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES + + """ + #pragma clang diagnostic pop + """, + strict_undefined=True).render(**dict(render_vars)) + + return self.elwise_template.build(context, + type_aliases=( + ("coord_t", coord_dtype), + ("box_id_t", box_id_dtype), + ("ball_id_t", ball_id_dtype), + ("peer_list_idx_t", peer_list_idx_dtype), + ("box_level_t", np.uint8), + ("box_flags_t", box_flags_enum.dtype), + ) + extra_type_aliases, + var_values=render_vars + extra_var_values, + more_preamble=preamble + extra_preamble) + + +SPACE_INVADER_QUERY_TEMPLATE = AreaQueryElementwiseTemplate( + extra_args=""" + coord_t *ball_radii, + float *outer_space_invader_dists, + %for ax in AXIS_NAMES[:dimensions]: + coord_t *ball_${ax}, + %endfor + """, + ball_radius_expr="${ball_radius} = ball_radii[${i}];", + ball_center_expr=r""" + %for ax in AXIS_NAMES[:dimensions]: + ${ball_center}.${ax} = ball_${ax}[${i}]; + %endfor + """, + leaf_found_op=r""" { - ${load_center("box_center", box_id)} - int box_level = box_levels[${box_id}]; + ${load_center("leaf_center", leaf_box_id)} + int leaf_level = box_levels[${leaf_box_id}]; - coord_t size_sum = LEVEL_TO_RAD(box_level) + ball_radius; + coord_t size_sum = LEVEL_TO_RAD(leaf_level) + ${ball_radius}; coord_t max_dist = 0; %for i in range(dimensions): max_dist = fmax(max_dist, - fabs(ball_center.s${i} - box_center.s${i})); + fabs(${ball_center}.s${i} - leaf_center.s${i})); %endfor bool is_overlapping = max_dist <= size_sum; @@ -432,72 +553,13 @@ SPACE_INVADER_QUERY_TEMPLATE = GUIDING_BOX_FINDER_MACRO + r"""//CL:mako// // IEEE floating point numbers, so long as the float/int endianness // matches (fingers crossed). atomic_max( - (volatile __global int *) &outer_space_invader_dists[${box_id}], + (volatile __global int *) + &outer_space_invader_dists[${leaf_box_id}], as_int((float) max_dist)); } } - - - typedef ${dtype_to_ctype(ball_id_dtype)} ball_id_t; - typedef ${dtype_to_ctype(peer_list_idx_dtype)} peer_list_idx_t; - - ball_id_t ball_nr = i; - - coord_vec_t ball_center; - %for i in range(dimensions): - ball_center.${AXIS_NAMES[i]} = ball_${AXIS_NAMES[i]}[ball_nr]; - %endfor - - coord_t ball_radius = ball_radii[ball_nr]; - - /////////////////////////////////// - // Step 1: Find the guiding box. // - /////////////////////////////////// - - ${find_guiding_box("ball_center", "ball_radius")} - - ////////////////////////////////////////////////////// - // Step 2 - Walk the peer boxes to find the leaves. // - ////////////////////////////////////////////////////// - - for (peer_list_idx_t pb_i = peer_list_starts[guiding_box], - pb_e = peer_list_starts[guiding_box+1]; pb_i < pb_e; ++pb_i) - { - box_id_t peer_box = peer_lists[pb_i]; - - if (!(box_flags[peer_box] & BOX_HAS_CHILDREN)) - { - ${check_if_found_outer_space_invader("peer_box")} - } - else - { - ${walk_reset("peer_box")} - - while (continue_walk) - { - box_id_t child_box_id = box_child_ids[ - walk_morton_nr * aligned_nboxes + walk_box_id]; - - if (child_box_id) - { - if (!(box_flags[child_box_id] & BOX_HAS_CHILDREN)) - { - ${check_if_found_outer_space_invader("child_box_id")} - } - else - { - // We want to descend into this box. Put the current state - // on the stack. - ${walk_push("child_box_id")} - continue; - } - } - - ${walk_advance()} - } - } - } -""" + """, + name="space_invader_query") # }}} @@ -750,6 +812,7 @@ class LeavesToBallsLookupBuilder(object): # {{{ space invader query build + class SpaceInvaderQueryBuilder(object): """Given a set of :math:`l^\infty` "balls", this class helps build a look-up table from leaf box to max center-to-center distance with an overlapping @@ -767,65 +830,14 @@ class SpaceInvaderQueryBuilder(object): @memoize_method def get_space_invader_query_kernel(self, dimensions, coord_dtype, box_id_dtype, ball_id_dtype, peer_list_idx_dtype, max_levels): - from pyopencl.tools import dtype_to_ctype - from boxtree import box_flags_enum - - logger.info("start building space invader query kernel") - - from boxtree.traversal import ( - TRAVERSAL_PREAMBLE_MAKO_DEFS, - TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES) - - template = Template( - TRAVERSAL_PREAMBLE_MAKO_DEFS - + SPACE_INVADER_QUERY_TEMPLATE, - strict_undefined=True) - - preamble = Template(TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES, - strict_undefined=True) - - render_vars = dict( - dimensions=dimensions, - dtype_to_ctype=dtype_to_ctype, - box_id_dtype=box_id_dtype, - particle_id_dtype=None, - coord_dtype=coord_dtype, - vec_types=cl.array.vec.types, - max_levels=max_levels, - AXIS_NAMES=AXIS_NAMES, - box_flags_enum=box_flags_enum, - peer_list_idx_dtype=peer_list_idx_dtype, - ball_id_dtype=ball_id_dtype, - debug=False, - # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) - stick_out_factor=0) - - from pyopencl.tools import VectorArg, ScalarArg - arg_decls = [ - VectorArg(coord_dtype, "box_centers"), - ScalarArg(coord_dtype, "root_extent"), - VectorArg(np.uint8, "box_levels"), - ScalarArg(box_id_dtype, "aligned_nboxes"), - VectorArg(box_id_dtype, "box_child_ids"), - VectorArg(box_flags_enum.dtype, "box_flags"), - VectorArg(peer_list_idx_dtype, "peer_list_starts"), - VectorArg(box_id_dtype, "peer_lists"), - VectorArg(coord_dtype, "ball_radii"), - VectorArg(np.float32, "outer_space_invader_dists"), - ] + [ - VectorArg(coord_dtype, "ball_"+ax) - for ax in AXIS_NAMES[:dimensions]] - - from pyopencl.elementwise import ElementwiseKernel - space_invader_query_kernel = ElementwiseKernel( + return SPACE_INVADER_QUERY_TEMPLATE.generate( self.context, - arguments=arg_decls, - operation=str(template.render(**render_vars)), - name="space_invaders_query", - preamble=preamble.render(**render_vars)) - - logger.info("done building space invader query kernel") - return space_invader_query_kernel + dimensions, + coord_dtype, + box_id_dtype, + ball_id_dtype, + peer_list_idx_dtype, + max_levels) # }}} @@ -910,6 +922,7 @@ class SpaceInvaderQueryBuilder(object): # {{{ peer list build + class PeerListFinder(object): """This class builds a look-up table from box numbers to peer boxes. The full definition [1]_ of a peer box is as follows: diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 98cd590..10da444 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -133,8 +133,10 @@ typedef ${dtype_to_ctype(box_id_dtype)} box_id_t; %if particle_id_dtype is not None: typedef ${dtype_to_ctype(particle_id_dtype)} particle_id_t; %endif +## Convert to dict first, as this may be passed as a tuple-of-tuples. +<% vec_types_dict = dict(vec_types) %> typedef ${dtype_to_ctype(coord_dtype)} coord_t; -typedef ${dtype_to_ctype(vec_types[coord_dtype, dimensions])} coord_vec_t; +typedef ${dtype_to_ctype(vec_types_dict[coord_dtype, dimensions])} coord_vec_t; #define NLEVELS ${max_levels} #define STICK_OUT_FACTOR ((coord_t) ${stick_out_factor}) -- GitLab From a3029c073cc11880fca73e11cabc9b212833d39c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 7 Sep 2016 19:51:10 -0500 Subject: [PATCH 3/8] Fix ball_nr silliness. --- boxtree/area_query.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index c67e8e2..6dd5d6b 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -295,9 +295,8 @@ AREA_QUERY_TEMPLATE = ( } - void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t ball_nr) + void generate(LIST_ARG_DECL USER_ARG_DECL ball_id_t i) { - ball_id_t i = ball_nr; """ + AREA_QUERY_WALKER_BODY + """ -- GitLab From cee840aca356359451002a74aa6372353131a384 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 15 Sep 2016 17:15:47 -0500 Subject: [PATCH 4/8] Various interface updates. --- boxtree/area_query.py | 71 ++++++++++++++++++------------------------- boxtree/tools.py | 43 ++++++++++++++++++++++++++ test/test_tree.py | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 42 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 6dd5d6b..b0cd2aa 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -145,7 +145,7 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// ${walk_init(0)} box_id_t guiding_box; - if (LEVEL_TO_RAD(0) < ball_radius / 2 || !(box_flags[0] & BOX_HAS_CHILDREN)) + if (LEVEL_TO_RAD(0) < ${ball_radius} / 2 || !(box_flags[0] & BOX_HAS_CHILDREN)) { guiding_box = 0; continue_walk = false; @@ -172,7 +172,7 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// coord_t max_dist = 0; %for i in range(dimensions): max_dist = fmax(max_dist, - fabs(ball_center.s${i} - child_center.s${i})); + distance(${ball_center}.s${i}, child_center.s${i})); %endfor contains_ball_center = max_dist <= child_rad; @@ -180,7 +180,8 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// if (contains_ball_center) { - if ((child_rad / 2 < ball_radius && ball_radius <= child_rad) || + if ((child_rad / 2 < ${ball_radius} + && ${ball_radius} <= child_rad) || !(box_flags[child_box_id] & BOX_HAS_CHILDREN)) { guiding_box = child_box_id; @@ -210,10 +211,8 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// AREA_QUERY_WALKER_BODY = r""" coord_vec_t ball_center; - ${get_ball_center("ball_center", "i")} - coord_t ball_radius; - ${get_ball_radius("ball_radius", "i")} + ${get_ball_center_and_radius("ball_center", "ball_radius", "i")} /////////////////////////////////// // Step 1: Find the guiding box. // @@ -391,6 +390,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t box_id) from pyopencl.elementwise import ElementwiseTemplate +from boxtree.tools import InlineBinarySearch STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( @@ -401,36 +401,28 @@ STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( """, operation=r"""//CL// /* Find my index in starts, place the index in dst. */ - idx_t l_idx = 0, r_idx = starts_len - 1, my_idx; + dst[i] = bsearch(starts, starts_len, i); + """, + name="starts_expander", + preamble=str(InlineBinarySearch("idx_t"))) - for (;;) - { - my_idx = (l_idx + r_idx) / 2; - if (starts[my_idx] <= i && i < starts[my_idx + 1]) - { - dst[i] = my_idx; - break; - } - - if (starts[my_idx] > i) - { - r_idx = my_idx - 1; - } - else - { - l_idx = my_idx + 1; - } - } - """, - name="starts_expander") +def unwrap_args(tree, peer_lists, *args): + return (tree.box_centers, + tree.root_extent, + tree.box_levels, + tree.aligned_nboxes, + tree.box_child_ids, + tree.box_flags, + peer_lists.peer_list_starts, + peer_lists.peer_lists) + args class AreaQueryElementwiseTemplate(object): # FIXME: Document. - def __init__(self, extra_args, ball_center_expr, ball_radius_expr, - leaf_found_op, name="area_query_elwise"): + def __init__(self, extra_args, ball_center_and_radius_expr, + leaf_found_op, preamble="", name="area_query_elwise"): def wrap_in_macro(decl, expr): return """ @@ -453,17 +445,18 @@ class AreaQueryElementwiseTemplate(object): box_id_t *peer_lists, """ + extra_args, operation="//CL:mako//\n" + - wrap_in_macro("get_ball_center(ball_center, i)", ball_center_expr) + - wrap_in_macro("get_ball_radius(ball_radius, i)", ball_radius_expr) + + wrap_in_macro("get_ball_center_and_radius(ball_center, ball_radius, i)", + ball_center_and_radius_expr) + wrap_in_macro("leaf_found_op(leaf_box_id, ball_center, ball_radius)", leaf_found_op) + TRAVERSAL_PREAMBLE_MAKO_DEFS + GUIDING_BOX_FINDER_MACRO + AREA_QUERY_WALKER_BODY, - name=name) + name=name, + preamble=preamble) def generate(self, context, - dimensions, coord_dtype, box_id_dtype, ball_id_dtype, + dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype, max_levels, extra_var_values=(), extra_type_aliases=(), extra_preamble=""): @@ -482,7 +475,6 @@ class AreaQueryElementwiseTemplate(object): ("AXIS_NAMES", AXIS_NAMES), ("box_flags_enum", box_flags_enum), ("peer_list_idx_dtype", peer_list_idx_dtype), - ("ball_id_dtype", ball_id_dtype), ("debug", False), # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) ("stick_out_factor", 0), @@ -505,7 +497,6 @@ class AreaQueryElementwiseTemplate(object): type_aliases=( ("coord_t", coord_dtype), ("box_id_t", box_id_dtype), - ("ball_id_t", ball_id_dtype), ("peer_list_idx_t", peer_list_idx_dtype), ("box_level_t", np.uint8), ("box_flags_t", box_flags_enum.dtype), @@ -522,8 +513,8 @@ SPACE_INVADER_QUERY_TEMPLATE = AreaQueryElementwiseTemplate( coord_t *ball_${ax}, %endfor """, - ball_radius_expr="${ball_radius} = ball_radii[${i}];", - ball_center_expr=r""" + ball_center_and_radius_expr=r""" + ${ball_radius} = ball_radii[${i}]; %for ax in AXIS_NAMES[:dimensions]: ${ball_center}.${ax} = ball_${ax}[${i}]; %endfor @@ -710,7 +701,6 @@ class AreaQueryBuilder(object): # {{{ area query transpose (leaves-to-balls) lookup build - class LeavesToBallsLookupBuilder(object): """Given a set of :math:`l^\infty` "balls", this class helps build a look-up table from leaf boxes to balls that overlap with each leaf box. @@ -811,7 +801,6 @@ class LeavesToBallsLookupBuilder(object): # {{{ space invader query build - class SpaceInvaderQueryBuilder(object): """Given a set of :math:`l^\infty` "balls", this class helps build a look-up table from leaf box to max center-to-center distance with an overlapping @@ -869,8 +858,6 @@ class SpaceInvaderQueryBuilder(object): if ball_radii.dtype != tree.coord_dtype: raise TypeError("ball_radii dtype must match tree.coord_dtype") - ball_id_dtype = tree.particle_id_dtype # ? - from pytools import div_ceil # Avoid generating too many kernels. max_levels = div_ceil(tree.nlevels, 10) * 10 @@ -883,7 +870,7 @@ class SpaceInvaderQueryBuilder(object): raise ValueError("size of peer lists must match with number of boxes") space_invader_query_kernel = self.get_space_invader_query_kernel( - tree.dimensions, tree.coord_dtype, tree.box_id_dtype, ball_id_dtype, + tree.dimensions, tree.coord_dtype, tree.box_id_dtype, peer_lists.peer_list_starts.dtype, max_levels) logger.info("space invader query: run space invader query") diff --git a/boxtree/tools.py b/boxtree/tools.py index 638c5fa..b048712 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -501,4 +501,47 @@ class MapValuesKernel(object): # }}} + +# {{{ binary search + +from mako.template import Template + + +BINARY_SEARCH_TEMPLATE = Template(""" +inline size_t bsearch(__global ${idx_t} *starts, size_t len, ${idx_t} val) +{ + size_t l_idx = 0, r_idx = len - 1, my_idx; + for (;;) + { + my_idx = (l_idx + r_idx) / 2; + + if (starts[my_idx] <= val && val < starts[my_idx + 1]) + { + return my_idx; + } + + if (starts[my_idx] > val) + { + r_idx = my_idx - 1; + } + else + { + l_idx = my_idx + 1; + } + } +} +""") + + +class InlineBinarySearch(object): + + def __init__(self, idx_t): + self.idx_t = idx_t + + @memoize_method + def __str__(self): + return BINARY_SEARCH_TEMPLATE.render(idx_t=self.idx_t) + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/test/test_tree.py b/test/test_tree.py index 5e8bb3b..25e0940 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -802,6 +802,68 @@ def test_space_invader_query(ctx_getter, dims, dtype, do_plot=False): # }}} +# {{{ particle query test + +@pytest.mark.opencl +@pytest.mark.area_query +@pytest.mark.parametrize("dims", [2, 3]) +def test_particle_query(ctx_getter, dims, do_plot=False): + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + nparticles = 5 * 10**3 + dtype = np.float64 + + particles = make_normal_particle_array(queue, nparticles, dims, dtype) + + if do_plot: + import matplotlib.pyplot as pt + pt.plot(particles[0].get(), particles[1].get(), "x") + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + queue.finish() + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True, prune_empty=False) + + from boxtree.particle_query import ( + ParticleQueryBuilder, LeavesToParticlesLookupBuilder) + pqb = ParticleQueryBuilder(ctx) + l2pl = LeavesToParticlesLookupBuilder(ctx) + + pq, _ = pqb(queue, tree, particles) + l2p, _ = l2pl(queue, tree, particles) + + # Get data to host for test. + tree = tree.get(queue=queue) + pq = pq.get(queue=queue) + l2p = l2p.get(queue=queue) + + from boxtree import box_flags_enum + leaf_boxes, = (tree.box_flags & box_flags_enum.HAS_CHILDREN == 0).nonzero() + + # Check if we recover the user source orders. + + assert set(pq) <= set(leaf_boxes), set(pq) - set(leaf_boxes) + + pq = np.array(sorted(np.arange(nparticles), + # box_source_starts tells us how the boxes should + # be sorted relative to each other. + key=lambda particle: tree.box_source_starts[pq[particle]])) + assert (tree.user_source_ids == pq).all() + + for leaf in leaf_boxes: + leaf_range = range(*l2p.particles_in_leaf_starts[leaf:leaf+2]) + tree_leaf_range = range(tree.box_source_starts[leaf], + tree.box_source_starts[leaf] + + tree.box_source_counts_cumul[leaf]) + assert set(l2p.particles_in_leaf_lists[leaf_range]) == \ + set(tree.user_source_ids[tree_leaf_range]) + +# }}} + # {{{ level restriction test @pytest.mark.opencl -- GitLab From 5f62af01af8c4d3891067ac8f821ca31367fee0b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 28 Sep 2016 13:55:23 -0500 Subject: [PATCH 5/8] Remove space invader query for now, as it doesn't seem to be needed. --- boxtree/area_query.py | 160 ------------------------------------------ test/test_tree.py | 78 -------------------- 2 files changed, 238 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index b0cd2aa..209dc29 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -54,12 +54,6 @@ Inverse of area query (Leaves -> overlapping balls) .. autoclass:: LeavesToBallsLookup -Space invader queries -^^^^^^^^^^^^^^^^^^^^^ - -.. autoclass:: SpaceInvaderQueryBuilder - - Peer Lists ^^^^^^^^^^ @@ -504,53 +498,6 @@ class AreaQueryElementwiseTemplate(object): var_values=render_vars + extra_var_values, more_preamble=preamble + extra_preamble) - -SPACE_INVADER_QUERY_TEMPLATE = AreaQueryElementwiseTemplate( - extra_args=""" - coord_t *ball_radii, - float *outer_space_invader_dists, - %for ax in AXIS_NAMES[:dimensions]: - coord_t *ball_${ax}, - %endfor - """, - ball_center_and_radius_expr=r""" - ${ball_radius} = ball_radii[${i}]; - %for ax in AXIS_NAMES[:dimensions]: - ${ball_center}.${ax} = ball_${ax}[${i}]; - %endfor - """, - leaf_found_op=r""" - { - ${load_center("leaf_center", leaf_box_id)} - int leaf_level = box_levels[${leaf_box_id}]; - - coord_t size_sum = LEVEL_TO_RAD(leaf_level) + ${ball_radius}; - - coord_t max_dist = 0; - %for i in range(dimensions): - max_dist = fmax(max_dist, - fabs(${ball_center}.s${i} - leaf_center.s${i})); - %endfor - - bool is_overlapping = max_dist <= size_sum; - - if (is_overlapping) - { - // The atomic max operation supports only integer types. - // However, max_dist is of a floating point type. - // For comparison purposes we reinterpret the bits of max_dist - // as an integer. The comparison result is the same as for positive - // IEEE floating point numbers, so long as the float/int endianness - // matches (fingers crossed). - atomic_max( - (volatile __global int *) - &outer_space_invader_dists[${leaf_box_id}], - as_int((float) max_dist)); - } - } - """, - name="space_invader_query") - # }}} @@ -799,113 +746,6 @@ class LeavesToBallsLookupBuilder(object): # }}} -# {{{ space invader query build - -class SpaceInvaderQueryBuilder(object): - """Given a set of :math:`l^\infty` "balls", this class helps build a look-up - table from leaf box to max center-to-center distance with an overlapping - ball. - - .. automethod:: __call__ - - """ - def __init__(self, context): - self.context = context - self.peer_list_finder = PeerListFinder(self.context) - - # {{{ Kernel generation - - @memoize_method - def get_space_invader_query_kernel(self, dimensions, coord_dtype, - box_id_dtype, ball_id_dtype, peer_list_idx_dtype, max_levels): - return SPACE_INVADER_QUERY_TEMPLATE.generate( - self.context, - dimensions, - coord_dtype, - box_id_dtype, - ball_id_dtype, - peer_list_idx_dtype, - max_levels) - - # }}} - - def __call__(self, queue, tree, ball_centers, ball_radii, peer_lists=None, - wait_for=None): - """:arg queue: a :class:`pyopencl.CommandQueue` - :arg tree: a :class:`boxtree.Tree`. - :arg ball_centers: an object array of coordinate - :class:`pyopencl.array.Array` instances. - Their *dtype* must match *tree*'s - :attr:`boxtree.Tree.coord_dtype`. - :arg ball_radii: a - :class:`pyopencl.array.Array` - of positive numbers. - Its *dtype* must match *tree*'s - :attr:`boxtree.Tree.coord_dtype`. - :arg peer_lists: may either be *None* or an instance of - :class:`PeerListLookup` associated with `tree`. - :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` - instances for whose completion this command waits before starting - execution. - :returns: a tuple *(sqi, event)*, where *sqi* is an instance of - :class:`pyopencl.array`, and *event* is a :class:`pyopencl.Event` - for dependency management. - """ - - from pytools import single_valued - if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype: - raise TypeError("ball_centers dtype must match tree.coord_dtype") - if ball_radii.dtype != tree.coord_dtype: - raise TypeError("ball_radii dtype must match tree.coord_dtype") - - from pytools import div_ceil - # Avoid generating too many kernels. - max_levels = div_ceil(tree.nlevels, 10) * 10 - - if peer_lists is None: - peer_lists, evt = self.peer_list_finder(queue, tree, wait_for=wait_for) - wait_for = [evt] - - if len(peer_lists.peer_list_starts) != tree.nboxes + 1: - raise ValueError("size of peer lists must match with number of boxes") - - space_invader_query_kernel = self.get_space_invader_query_kernel( - tree.dimensions, tree.coord_dtype, tree.box_id_dtype, - peer_lists.peer_list_starts.dtype, max_levels) - - logger.info("space invader query: run space invader query") - - outer_space_invader_dists = cl.array.zeros(queue, tree.nboxes, np.float32) - wait_for.extend(outer_space_invader_dists.events) - - evt = space_invader_query_kernel( - tree.box_centers, tree.root_extent, - tree.box_levels, tree.aligned_nboxes, - tree.box_child_ids, tree.box_flags, - peer_lists.peer_list_starts, - peer_lists.peer_lists, - ball_radii, - outer_space_invader_dists, - *tuple(bc for bc in ball_centers), - wait_for=wait_for, - queue=queue, - slice=slice(len(ball_radii))) - - if tree.coord_dtype != np.dtype(np.float32): - # The kernel output is always an array of float32 due to limited - # support for atomic operations with float64 in OpenCL. - # Here the output is cast to match the coord dtype. - outer_space_invader_dists.finish() - outer_space_invader_dists = outer_space_invader_dists.astype( - tree.coord_dtype) - evt, = outer_space_invader_dists.events - - logger.info("space invader query: done") - - return outer_space_invader_dists, evt - -# }}} - # {{{ peer list build diff --git a/test/test_tree.py b/test/test_tree.py index 25e0940..9e73eec 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -724,84 +724,6 @@ def test_area_query(ctx_getter, dims, do_plot=False): # }}} -# {{{ space invader query test - -@pytest.mark.opencl -@pytest.mark.geo_lookup -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("dims", [2, 3]) -def test_space_invader_query(ctx_getter, dims, dtype, do_plot=False): - logging.basicConfig(level=logging.INFO) - - ctx = ctx_getter() - queue = cl.CommandQueue(ctx) - - if (dtype == np.float32 - and dims == 2 - and queue.device.platform.name == "Portable Computing Language"): - # arg list lenghts disagree - pytest.xfail("2D float doesn't work on POCL") - - dtype = np.dtype(dtype) - nparticles = 10**5 - - particles = make_normal_particle_array(queue, nparticles, dims, dtype) - - if do_plot: - import matplotlib.pyplot as pt - pt.plot(particles[0].get(), particles[1].get(), "x") - - from boxtree import TreeBuilder - tb = TreeBuilder(ctx) - - queue.finish() - tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) - - nballs = 10**4 - ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) - ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) - - from boxtree.area_query import ( - LeavesToBallsLookupBuilder, SpaceInvaderQueryBuilder) - - siqb = SpaceInvaderQueryBuilder(ctx) - # We can use leaves-to-balls lookup to get the set of overlapping balls for - # each box, and from there to compute the outer space invader distance. - lblb = LeavesToBallsLookupBuilder(ctx) - - siq, _ = siqb(queue, tree, ball_centers, ball_radii) - lbl, _ = lblb(queue, tree, ball_centers, ball_radii) - - # get data to host for test - tree = tree.get(queue=queue) - siq = siq.get(queue=queue) - lbl = lbl.get(queue=queue) - - ball_centers = np.array([x.get() for x in ball_centers]) - ball_radii = ball_radii.get() - - # Find leaf boxes. - from boxtree import box_flags_enum - - outer_space_invader_dist = np.zeros(tree.nboxes) - - for ibox in range(tree.nboxes): - # We only want leaves here. - if tree.box_flags[ibox] & box_flags_enum.HAS_CHILDREN: - continue - - start, end = lbl.balls_near_box_starts[ibox:ibox + 2] - space_invaders = lbl.balls_near_box_lists[start:end] - if len(space_invaders) > 0: - outer_space_invader_dist[ibox] = np.max(np.abs( - tree.box_centers[:, ibox].reshape((-1, 1)) - - ball_centers[:, space_invaders])) - - assert np.allclose(siq, outer_space_invader_dist) - -# }}} - - # {{{ particle query test @pytest.mark.opencl -- GitLab From 5ab35a9ed21f979d7307f237060ecb7b4c261981 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 28 Sep 2016 14:18:33 -0500 Subject: [PATCH 6/8] Document area query elementwise template and add test. --- boxtree/area_query.py | 34 +++++++++++------- test/test_tree.py | 84 ++++++++++++++++++++++--------------------- 2 files changed, 65 insertions(+), 53 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 209dc29..13a6717 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -132,6 +132,7 @@ class LeavesToBallsLookup(DeviceDataRecord): # }}} + # {{{ kernel templates GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// @@ -139,7 +140,8 @@ GUIDING_BOX_FINDER_MACRO = r"""//CL:mako// ${walk_init(0)} box_id_t guiding_box; - if (LEVEL_TO_RAD(0) < ${ball_radius} / 2 || !(box_flags[0] & BOX_HAS_CHILDREN)) + if (LEVEL_TO_RAD(0) < ${ball_radius} / 2 + || !(box_flags[0] & BOX_HAS_CHILDREN)) { guiding_box = 0; continue_walk = false; @@ -400,20 +402,27 @@ STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( name="starts_expander", preamble=str(InlineBinarySearch("idx_t"))) +# }}} -def unwrap_args(tree, peer_lists, *args): - return (tree.box_centers, - tree.root_extent, - tree.box_levels, - tree.aligned_nboxes, - tree.box_child_ids, - tree.box_flags, - peer_lists.peer_list_starts, - peer_lists.peer_lists) + args +# {{{ area query elementwise template class AreaQueryElementwiseTemplate(object): - # FIXME: Document. + """ + Experimental: Intended as a way to perform operations in the body of an area + query. + """ + + @staticmethod + def unwrap_args(tree, peer_lists, *args): + return (tree.box_centers, + tree.root_extent, + tree.box_levels, + tree.aligned_nboxes, + tree.box_child_ids, + tree.box_flags, + peer_lists.peer_list_starts, + peer_lists.peer_lists) + args def __init__(self, extra_args, ball_center_and_radius_expr, leaf_found_op, preamble="", name="area_query_elwise"): @@ -646,6 +655,7 @@ class AreaQueryBuilder(object): # }}} + # {{{ area query transpose (leaves-to-balls) lookup build class LeavesToBallsLookupBuilder(object): @@ -746,8 +756,8 @@ class LeavesToBallsLookupBuilder(object): # }}} -# {{{ peer list build +# {{{ peer list build class PeerListFinder(object): """This class builds a look-up table from box numbers to peer boxes. The diff --git a/test/test_tree.py b/test/test_tree.py index 9e73eec..a5dad48 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -721,21 +721,15 @@ def test_area_query(ctx_getter, dims, do_plot=False): actual = near_leaves assert sorted(found) == sorted(actual) -# }}} - - -# {{{ particle query test @pytest.mark.opencl @pytest.mark.area_query @pytest.mark.parametrize("dims", [2, 3]) -def test_particle_query(ctx_getter, dims, do_plot=False): - logging.basicConfig(level=logging.INFO) - +def test_area_query_elwise(ctx_getter, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) - nparticles = 5 * 10**3 + nparticles = 10**5 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) @@ -748,44 +742,52 @@ def test_particle_query(ctx_getter, dims, do_plot=False): tb = TreeBuilder(ctx) queue.finish() - tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True, prune_empty=False) - - from boxtree.particle_query import ( - ParticleQueryBuilder, LeavesToParticlesLookupBuilder) - pqb = ParticleQueryBuilder(ctx) - l2pl = LeavesToParticlesLookupBuilder(ctx) - - pq, _ = pqb(queue, tree, particles) - l2p, _ = l2pl(queue, tree, particles) - - # Get data to host for test. - tree = tree.get(queue=queue) - pq = pq.get(queue=queue) - l2p = l2p.get(queue=queue) - - from boxtree import box_flags_enum - leaf_boxes, = (tree.box_flags & box_flags_enum.HAS_CHILDREN == 0).nonzero() - - # Check if we recover the user source orders. - - assert set(pq) <= set(leaf_boxes), set(pq) - set(leaf_boxes) + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) - pq = np.array(sorted(np.arange(nparticles), - # box_source_starts tells us how the boxes should - # be sorted relative to each other. - key=lambda particle: tree.box_source_starts[pq[particle]])) - assert (tree.user_source_ids == pq).all() + nballs = 10**4 + ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) + ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) - for leaf in leaf_boxes: - leaf_range = range(*l2p.particles_in_leaf_starts[leaf:leaf+2]) - tree_leaf_range = range(tree.box_source_starts[leaf], - tree.box_source_starts[leaf] + - tree.box_source_counts_cumul[leaf]) - assert set(l2p.particles_in_leaf_lists[leaf_range]) == \ - set(tree.user_source_ids[tree_leaf_range]) + from boxtree.area_query import ( + AreaQueryElementwiseTemplate, PeerListFinder) + + template = AreaQueryElementwiseTemplate( + extra_args=""" + coord_t *ball_radii, + %for ax in AXIS_NAMES[:dimensions]: + coord_t *ball_${ax}, + %endfor + """, + ball_center_and_radius_expr=""" + %for ax in AXIS_NAMES[:dimensions]: + ${ball_center}.${ax} = ball_${ax}[${i}]; + %endfor + ${ball_radius} = ball_radii[${i}]; + """, + leaf_found_op="") + + peer_lists, evt = PeerListFinder(ctx)(queue, tree) + + kernel = template.generate( + ctx, + dims, + tree.coord_dtype, + tree.box_id_dtype, + peer_lists.peer_list_starts.dtype, + tree.nlevels) + + evt = kernel( + *template.unwrap_args( + tree, peer_lists, ball_radii, *ball_centers), + queue=queue, + wait_for=[evt], + range=range(len(ball_radii))) + + cl.wait_for_events([evt]) # }}} + # {{{ level restriction test @pytest.mark.opencl -- GitLab From 79b2f4d3093ec9e12f27c2c83307b3f9062e1aec Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 28 Sep 2016 14:33:23 -0500 Subject: [PATCH 7/8] Remove get_ball_center, get_ball_radius; use get_ball_center_and_radius() instead --- boxtree/area_query.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 13a6717..3e7742d 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -266,13 +266,10 @@ AREA_QUERY_TEMPLATE = ( typedef ${dtype_to_ctype(ball_id_dtype)} ball_id_t; typedef ${dtype_to_ctype(peer_list_idx_dtype)} peer_list_idx_t; - <%def name="get_ball_center(ball_center, i)"> + <%def name="get_ball_center_and_radius(ball_center, ball_radius, i)"> %for ax in AXIS_NAMES[:dimensions]: ${ball_center}.${ax} = ball_${ax}[${i}]; %endfor - - - <%def name="get_ball_radius(ball_radius, i)"> ${ball_radius} = ball_radii[${i}]; -- GitLab From b46e0914b684718730fd5beda05763ce4a795911 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 29 Sep 2016 12:07:22 -0500 Subject: [PATCH 8/8] range=range(...) => range=slice(...) --- test/test_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_tree.py b/test/test_tree.py index a5dad48..5f91f88 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -781,7 +781,7 @@ def test_area_query_elwise(ctx_getter, dims, do_plot=False): tree, peer_lists, ball_radii, *ball_centers), queue=queue, wait_for=[evt], - range=range(len(ball_radii))) + range=slice(len(ball_radii))) cl.wait_for_events([evt]) -- GitLab